diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..16fc67aeb5a4d518bfb10508b582eec8af35f797 --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/best_swin_model_final.pth +outputs/new_model/best_model_final_TTA.pth +outputs/new_model2/best_model_final_TTA_Focal.pth +outputs/new_model/best_model_final_TTA_Focal.pth +outputs/models/best_swin_weights_only.pth \ No newline at end of file diff --git a/.history/.gitignore_20250926211345 b/.history/.gitignore_20250926211345 new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.history/.gitignore_20250926211429 b/.history/.gitignore_20250926211429 new file mode 100644 index 0000000000000000000000000000000000000000..6a725cb9363b4353a0cad6c436849c20a4ccdaf1 --- /dev/null +++ b/.history/.gitignore_20250926211429 @@ -0,0 +1 @@ +/dataset/ \ No newline at end of file diff --git a/.history/.gitignore_20250926211435 b/.history/.gitignore_20250926211435 new file mode 100644 index 0000000000000000000000000000000000000000..39a05ca01837e9a441c95f9e8c62efb7914a0bf9 --- /dev/null +++ b/.history/.gitignore_20250926211435 @@ -0,0 +1,2 @@ +/dataset/ +/data/ \ No newline at end of file diff --git a/.history/.gitignore_20250926211444 b/.history/.gitignore_20250926211444 new file mode 100644 index 0000000000000000000000000000000000000000..f473fdc002e52fe9c8b29c82bac1ec79f674dae5 --- /dev/null +++ b/.history/.gitignore_20250926211444 @@ -0,0 +1,3 @@ +/dataset/ +/data/ +/env \ No newline at end of file diff --git a/.history/.gitignore_20250926211446 b/.history/.gitignore_20250926211446 new file mode 100644 index 0000000000000000000000000000000000000000..361099c244c62acff6ba13f387c1cac2871c6162 --- /dev/null +++ b/.history/.gitignore_20250926211446 @@ -0,0 +1,3 @@ +/dataset/ +/data/ +/ \ No newline at end of file diff --git a/.history/.gitignore_20250926211448 b/.history/.gitignore_20250926211448 new file mode 100644 index 0000000000000000000000000000000000000000..caea3342510ed325c1ef072b9bce742e11be1370 --- /dev/null +++ b/.history/.gitignore_20250926211448 @@ -0,0 +1,3 @@ +/dataset/ +/data/ +/.venv \ No newline at end of file diff --git a/.history/.gitignore_20250926211450 b/.history/.gitignore_20250926211450 new file mode 100644 index 0000000000000000000000000000000000000000..263c337228096bb261da136a784fcac0d3b63b37 --- /dev/null +++ b/.history/.gitignore_20250926211450 @@ -0,0 +1,3 @@ +/dataset/ +/data/ +/.venv/ diff --git a/.history/.gitignore_20250926211454 b/.history/.gitignore_20250926211454 new file mode 100644 index 0000000000000000000000000000000000000000..aba1ed6b4873b4ddf3718b525785f6902e16a4ea --- /dev/null +++ b/.history/.gitignore_20250926211454 @@ -0,0 +1,4 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ \ No newline at end of file diff --git a/.history/.gitignore_20250926211510 b/.history/.gitignore_20250926211510 new file mode 100644 index 0000000000000000000000000000000000000000..aba1ed6b4873b4ddf3718b525785f6902e16a4ea --- /dev/null +++ b/.history/.gitignore_20250926211510 @@ -0,0 +1,4 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ \ No newline at end of file diff --git a/.history/.gitignore_20250926211619 b/.history/.gitignore_20250926211619 new file mode 100644 index 0000000000000000000000000000000000000000..f7fe6c6845660f5d12328365197d31dfe7799d4a --- /dev/null +++ b/.history/.gitignore_20250926211619 @@ -0,0 +1,5 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +/models/ \ No newline at end of file diff --git a/.history/.gitignore_20250926211626 b/.history/.gitignore_20250926211626 new file mode 100644 index 0000000000000000000000000000000000000000..2b4f197e35c440d83c7aa6299e09eadb1e28bf36 --- /dev/null +++ b/.history/.gitignore_20250926211626 @@ -0,0 +1,5 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +/m \ No newline at end of file diff --git a/.history/.gitignore_20250926211628 b/.history/.gitignore_20250926211628 new file mode 100644 index 0000000000000000000000000000000000000000..bd099c2671748dd9bed56bbc669cf8517c3134d9 --- /dev/null +++ b/.history/.gitignore_20250926211628 @@ -0,0 +1,5 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +/out \ No newline at end of file diff --git a/.history/.gitignore_20250926211631 b/.history/.gitignore_20250926211631 new file mode 100644 index 0000000000000000000000000000000000000000..8d7486a9505222e43b7b914a243fe25bb0a75c92 --- /dev/null +++ b/.history/.gitignore_20250926211631 @@ -0,0 +1,5 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +/outputs \ No newline at end of file diff --git a/.history/.gitignore_20250926211635 b/.history/.gitignore_20250926211635 new file mode 100644 index 0000000000000000000000000000000000000000..57ea22ec16bf1df73111ff360c61bc4242b84550 --- /dev/null +++ b/.history/.gitignore_20250926211635 @@ -0,0 +1,4 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ diff --git a/.history/.gitignore_20250926211640 b/.history/.gitignore_20250926211640 new file mode 100644 index 0000000000000000000000000000000000000000..8124c39dce548bb381cfab536dc4e9b96c5190fd --- /dev/null +++ b/.history/.gitignore_20250926211640 @@ -0,0 +1,5 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +C:\Users\user\Documents\Project\Cataract-ViT\outputs\models\best_vit_model_amp.pth \ No newline at end of file diff --git a/.history/.gitignore_20250926211646 b/.history/.gitignore_20250926211646 new file mode 100644 index 0000000000000000000000000000000000000000..549f9ed5bf19f20e032b703f81c406e06289b403 --- /dev/null +++ b/.history/.gitignore_20250926211646 @@ -0,0 +1,5 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs\models\best_vit_model_amp.pth \ No newline at end of file diff --git a/.history/.gitignore_20250926211648 b/.history/.gitignore_20250926211648 new file mode 100644 index 0000000000000000000000000000000000000000..3f4cf328431601a32d265ae412b9cf7e167f3ff4 --- /dev/null +++ b/.history/.gitignore_20250926211648 @@ -0,0 +1,5 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models\best_vit_model_amp.pth \ No newline at end of file diff --git a/.history/.gitignore_20250926211650 b/.history/.gitignore_20250926211650 new file mode 100644 index 0000000000000000000000000000000000000000..5b22cdbe02ec6f1fc5b1b13d0f305c9394bbef41 --- /dev/null +++ b/.history/.gitignore_20250926211650 @@ -0,0 +1,5 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/modelsbest_vit_model_amp.pth \ No newline at end of file diff --git a/.history/.gitignore_20250926211651 b/.history/.gitignore_20250926211651 new file mode 100644 index 0000000000000000000000000000000000000000..1fc17354bf8a1493c88e40b1bde8ed3f8741afe6 --- /dev/null +++ b/.history/.gitignore_20250926211651 @@ -0,0 +1,5 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth \ No newline at end of file diff --git a/.history/.gitignore_20250926211658 b/.history/.gitignore_20250926211658 new file mode 100644 index 0000000000000000000000000000000000000000..a6f853902f606776f490bd4a33f580363dc7adf1 --- /dev/null +++ b/.history/.gitignore_20250926211658 @@ -0,0 +1,6 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/ \ No newline at end of file diff --git a/.history/.gitignore_20250926211703 b/.history/.gitignore_20250926211703 new file mode 100644 index 0000000000000000000000000000000000000000..b92bb5fbf575c8514c0546a429afa013aee97f54 --- /dev/null +++ b/.history/.gitignore_20250926211703 @@ -0,0 +1,6 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/best_vit_model.pth \ No newline at end of file diff --git a/.history/.gitignore_20250926211707 b/.history/.gitignore_20250926211707 new file mode 100644 index 0000000000000000000000000000000000000000..4057a18b5a117127ceab6446944ef1203732836d --- /dev/null +++ b/.history/.gitignore_20250926211707 @@ -0,0 +1,6 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/mode \ No newline at end of file diff --git a/.history/.gitignore_20250926211710 b/.history/.gitignore_20250926211710 new file mode 100644 index 0000000000000000000000000000000000000000..a6f853902f606776f490bd4a33f580363dc7adf1 --- /dev/null +++ b/.history/.gitignore_20250926211710 @@ -0,0 +1,6 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/ \ No newline at end of file diff --git a/.history/.gitignore_20250926211712 b/.history/.gitignore_20250926211712 new file mode 100644 index 0000000000000000000000000000000000000000..4d50a3d516fc9625c1a314480f42bf0d872ca666 --- /dev/null +++ b/.history/.gitignore_20250926211712 @@ -0,0 +1,6 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit \ No newline at end of file diff --git a/.history/.gitignore_20250926211716 b/.history/.gitignore_20250926211716 new file mode 100644 index 0000000000000000000000000000000000000000..1a7504d0607a547fb6f8517ed7f299433fda3832 --- /dev/null +++ b/.history/.gitignore_20250926211716 @@ -0,0 +1,6 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae \ No newline at end of file diff --git a/.history/.gitignore_20250926211719 b/.history/.gitignore_20250926211719 new file mode 100644 index 0000000000000000000000000000000000000000..107750a13c91f3fbb81ca5daa4deeddc9f1c91c3 --- /dev/null +++ b/.history/.gitignore_20250926211719 @@ -0,0 +1,6 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract \ No newline at end of file diff --git a/.history/.gitignore_20250926211721 b/.history/.gitignore_20250926211721 new file mode 100644 index 0000000000000000000000000000000000000000..0a2d62c6575b8ccd092f19fc5185a9862821fd57 --- /dev/null +++ b/.history/.gitignore_20250926211721 @@ -0,0 +1,6 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_ \ No newline at end of file diff --git a/.history/.gitignore_20250926211726 b/.history/.gitignore_20250926211726 new file mode 100644 index 0000000000000000000000000000000000000000..46d668a3e238e4df4c2f30633f793dd4ed2f5fda --- /dev/null +++ b/.history/.gitignore_20250926211726 @@ -0,0 +1,6 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier. \ No newline at end of file diff --git a/.history/.gitignore_20250926211728 b/.history/.gitignore_20250926211728 new file mode 100644 index 0000000000000000000000000000000000000000..45522313d252c125898dea25d7e30b20270c1d35 --- /dev/null +++ b/.history/.gitignore_20250926211728 @@ -0,0 +1,6 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth \ No newline at end of file diff --git a/.history/.gitignore_20250926211733 b/.history/.gitignore_20250926211733 new file mode 100644 index 0000000000000000000000000000000000000000..45522313d252c125898dea25d7e30b20270c1d35 --- /dev/null +++ b/.history/.gitignore_20250926211733 @@ -0,0 +1,6 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth \ No newline at end of file diff --git a/.history/.gitignore_20250926211734 b/.history/.gitignore_20250926211734 new file mode 100644 index 0000000000000000000000000000000000000000..45522313d252c125898dea25d7e30b20270c1d35 --- /dev/null +++ b/.history/.gitignore_20250926211734 @@ -0,0 +1,6 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth \ No newline at end of file diff --git a/.history/.gitignore_20250926211746 b/.history/.gitignore_20250926211746 new file mode 100644 index 0000000000000000000000000000000000000000..ded79c1c03c341222137962ada210e4e1829e751 --- /dev/null +++ b/.history/.gitignore_20250926211746 @@ -0,0 +1,6 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth diff --git a/.history/.gitignore_20250926211748 b/.history/.gitignore_20250926211748 new file mode 100644 index 0000000000000000000000000000000000000000..1fb6e326f500a2f3b5e2f7030579409e9090216f --- /dev/null +++ b/.history/.gitignore_20250926211748 @@ -0,0 +1,7 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +lo \ No newline at end of file diff --git a/.history/.gitignore_20250926211750 b/.history/.gitignore_20250926211750 new file mode 100644 index 0000000000000000000000000000000000000000..da02a2f282b91d19c0bac1df1f5fed3c98e186f8 --- /dev/null +++ b/.history/.gitignore_20250926211750 @@ -0,0 +1,7 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ diff --git a/.history/.gitignore_20250926211753 b/.history/.gitignore_20250926211753 new file mode 100644 index 0000000000000000000000000000000000000000..da02a2f282b91d19c0bac1df1f5fed3c98e186f8 --- /dev/null +++ b/.history/.gitignore_20250926211753 @@ -0,0 +1,7 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ diff --git a/.history/.gitignore_20250927155451 b/.history/.gitignore_20250927155451 new file mode 100644 index 0000000000000000000000000000000000000000..71e011966af433ecad933a34945f964e03395e67 --- /dev/null +++ b/.history/.gitignore_20250927155451 @@ -0,0 +1,8 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +C:\Users\user\Documents\Project\Cataract-ViT\outputs\models\best_vit_model_balanced.pth \ No newline at end of file diff --git a/.history/.gitignore_20250927155455 b/.history/.gitignore_20250927155455 new file mode 100644 index 0000000000000000000000000000000000000000..8454a3f7a8d558f10134071a3a5b29262f6ed282 --- /dev/null +++ b/.history/.gitignore_20250927155455 @@ -0,0 +1,8 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs\models\best_vit_model_balanced.pth \ No newline at end of file diff --git a/.history/.gitignore_20250927155458 b/.history/.gitignore_20250927155458 new file mode 100644 index 0000000000000000000000000000000000000000..85c33d554fdc244b825ea2891240c7ec0f1033d6 --- /dev/null +++ b/.history/.gitignore_20250927155458 @@ -0,0 +1,8 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models\best_vit_model_balanced.pth \ No newline at end of file diff --git a/.history/.gitignore_20250927155500 b/.history/.gitignore_20250927155500 new file mode 100644 index 0000000000000000000000000000000000000000..900903a3d9b97d138dfed978ecf5b28020d51faa --- /dev/null +++ b/.history/.gitignore_20250927155500 @@ -0,0 +1,8 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth \ No newline at end of file diff --git a/.history/.gitignore_20250927204835 b/.history/.gitignore_20250927204835 new file mode 100644 index 0000000000000000000000000000000000000000..2f9eb0f88ca0b4beab6044b968175055e76800ba --- /dev/null +++ b/.history/.gitignore_20250927204835 @@ -0,0 +1,9 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs \ No newline at end of file diff --git a/.history/.gitignore_20250927204839 b/.history/.gitignore_20250927204839 new file mode 100644 index 0000000000000000000000000000000000000000..9dab3c664f4d294d275125ead9e9c4d1fa734969 --- /dev/null +++ b/.history/.gitignore_20250927204839 @@ -0,0 +1,9 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/ \ No newline at end of file diff --git a/.history/.gitignore_20250927204842 b/.history/.gitignore_20250927204842 new file mode 100644 index 0000000000000000000000000000000000000000..3b258e235223035856b4d39ec7e9332467e0a2d8 --- /dev/null +++ b/.history/.gitignore_20250927204842 @@ -0,0 +1,9 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/best \ No newline at end of file diff --git a/.history/.gitignore_20250927204844 b/.history/.gitignore_20250927204844 new file mode 100644 index 0000000000000000000000000000000000000000..e7b56de791cad2c0d10b548968cdac9e82d55794 --- /dev/null +++ b/.history/.gitignore_20250927204844 @@ -0,0 +1,9 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/best_ \ No newline at end of file diff --git a/.history/.gitignore_20250927204847 b/.history/.gitignore_20250927204847 new file mode 100644 index 0000000000000000000000000000000000000000..87d8859f6ecd74eebe208ce3e644a5d8fad30299 --- /dev/null +++ b/.history/.gitignore_20250927204847 @@ -0,0 +1,9 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/best_swim \ No newline at end of file diff --git a/.history/.gitignore_20250927204849 b/.history/.gitignore_20250927204849 new file mode 100644 index 0000000000000000000000000000000000000000..f8323d5fcc9d149cfbe197fda023d60e90d90374 --- /dev/null +++ b/.history/.gitignore_20250927204849 @@ -0,0 +1,9 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/best_swimm_model.pth \ No newline at end of file diff --git a/.history/.gitignore_20250927204854 b/.history/.gitignore_20250927204854 new file mode 100644 index 0000000000000000000000000000000000000000..8e8348ff2bea2dc043af266ba2e37b621413dc6f --- /dev/null +++ b/.history/.gitignore_20250927204854 @@ -0,0 +1,9 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/best_swin_model.pth \ No newline at end of file diff --git a/.history/.gitignore_20250927204913 b/.history/.gitignore_20250927204913 new file mode 100644 index 0000000000000000000000000000000000000000..764c57d0fcdd44dcf06eea57b1d58475c441bfcb --- /dev/null +++ b/.history/.gitignore_20250927204913 @@ -0,0 +1,9 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/best_swin_model_final.pth \ No newline at end of file diff --git a/.history/.gitignore_20250930185922 b/.history/.gitignore_20250930185922 new file mode 100644 index 0000000000000000000000000000000000000000..f85014e42d70f9e887c1a96edbee893e89e5e9eb --- /dev/null +++ b/.history/.gitignore_20250930185922 @@ -0,0 +1,10 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/best_swin_model_final.pth +outp \ No newline at end of file diff --git a/.history/.gitignore_20250930185923 b/.history/.gitignore_20250930185923 new file mode 100644 index 0000000000000000000000000000000000000000..def0dffeba7e73d5545d102247717c3571d2f1de --- /dev/null +++ b/.history/.gitignore_20250930185923 @@ -0,0 +1,10 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/best_swin_model_final.pth +outputs/models/swin_mae_cataract_classifier.pth \ No newline at end of file diff --git a/.history/.gitignore_20250930185932 b/.history/.gitignore_20250930185932 new file mode 100644 index 0000000000000000000000000000000000000000..5810eeb10a12dcc645f8a864fa74d5abc652606c --- /dev/null +++ b/.history/.gitignore_20250930185932 @@ -0,0 +1,10 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/best_swin_model_final.pth +outputs/new_models/swin_mae_cataract_classifier.pth \ No newline at end of file diff --git a/.history/.gitignore_20250930185934 b/.history/.gitignore_20250930185934 new file mode 100644 index 0000000000000000000000000000000000000000..766c0f079065fc915fb5ca6877abc7e583acb754 --- /dev/null +++ b/.history/.gitignore_20250930185934 @@ -0,0 +1,10 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/best_swin_model_final.pth +outputs/new_model/swin_mae_cataract_classifier.pth \ No newline at end of file diff --git a/.history/.gitignore_20250930185940 b/.history/.gitignore_20250930185940 new file mode 100644 index 0000000000000000000000000000000000000000..b601226b3b5c9361ffe14eaf3fdf99f54bb5f140 --- /dev/null +++ b/.history/.gitignore_20250930185940 @@ -0,0 +1,10 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/best_swin_model_final.pth +outputs/new_model/best.pth \ No newline at end of file diff --git a/.history/.gitignore_20250930185946 b/.history/.gitignore_20250930185946 new file mode 100644 index 0000000000000000000000000000000000000000..0ef6361566dd4d156e735d5acd9c48cbee81560e --- /dev/null +++ b/.history/.gitignore_20250930185946 @@ -0,0 +1,10 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/best_swin_model_final.pth +outputs/new_model/best_model_final_TTA.pth \ No newline at end of file diff --git a/.history/.gitignore_20251002092158 b/.history/.gitignore_20251002092158 new file mode 100644 index 0000000000000000000000000000000000000000..75c2d555fbe3b6979ed2497b97b3a10e3ae96ca3 --- /dev/null +++ b/.history/.gitignore_20251002092158 @@ -0,0 +1,11 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/best_swin_model_final.pth +outputs/new_model/best_model_final_TTA.pth +outputs/ \ No newline at end of file diff --git a/.history/.gitignore_20251002092159 b/.history/.gitignore_20251002092159 new file mode 100644 index 0000000000000000000000000000000000000000..6126faef851a345e106d8813be79b86b60153b8a --- /dev/null +++ b/.history/.gitignore_20251002092159 @@ -0,0 +1,11 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/best_swin_model_final.pth +outputs/new_model/best_model_final_TTA.pth +outputs/new \ No newline at end of file diff --git a/.history/.gitignore_20251002092203 b/.history/.gitignore_20251002092203 new file mode 100644 index 0000000000000000000000000000000000000000..986d38cf91a83983327522cccedaebae32238a23 --- /dev/null +++ b/.history/.gitignore_20251002092203 @@ -0,0 +1,11 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/best_swin_model_final.pth +outputs/new_model/best_model_final_TTA.pth +outputs/new_model2 \ No newline at end of file diff --git a/.history/.gitignore_20251002092205 b/.history/.gitignore_20251002092205 new file mode 100644 index 0000000000000000000000000000000000000000..ce3c5cc97168ce9a370576e710c7f1b5718dbd8b --- /dev/null +++ b/.history/.gitignore_20251002092205 @@ -0,0 +1,11 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/best_swin_model_final.pth +outputs/new_model/best_model_final_TTA.pth +outputs/new_model2/ \ No newline at end of file diff --git a/.history/.gitignore_20251002092209 b/.history/.gitignore_20251002092209 new file mode 100644 index 0000000000000000000000000000000000000000..97060288582e7308d43c02bec3f4f6f2c46bfb1b --- /dev/null +++ b/.history/.gitignore_20251002092209 @@ -0,0 +1,11 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/best_swin_model_final.pth +outputs/new_model/best_model_final_TTA.pth +outputs/new_model2/best_model_final_TTA.pth \ No newline at end of file diff --git a/.history/.gitignore_20251002092213 b/.history/.gitignore_20251002092213 new file mode 100644 index 0000000000000000000000000000000000000000..1eef83e119ca9aa201d94235dd1753aeca8be0d0 --- /dev/null +++ b/.history/.gitignore_20251002092213 @@ -0,0 +1,11 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/best_swin_model_final.pth +outputs/new_model/best_model_final_TTA.pth +outputs/new_model2/best_model_final_local.pth \ No newline at end of file diff --git a/.history/.gitignore_20251002092218 b/.history/.gitignore_20251002092218 new file mode 100644 index 0000000000000000000000000000000000000000..46c09c83f5d9eab643c0b9b826905269f4e39918 --- /dev/null +++ b/.history/.gitignore_20251002092218 @@ -0,0 +1,12 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/best_swin_model_final.pth +outputs/new_model/best_model_final_TTA.pth +outputs/new_model2/best_model_final_l +ocal.pth \ No newline at end of file diff --git a/.history/.gitignore_20251002092219 b/.history/.gitignore_20251002092219 new file mode 100644 index 0000000000000000000000000000000000000000..1eef83e119ca9aa201d94235dd1753aeca8be0d0 --- /dev/null +++ b/.history/.gitignore_20251002092219 @@ -0,0 +1,11 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/best_swin_model_final.pth +outputs/new_model/best_model_final_TTA.pth +outputs/new_model2/best_model_final_local.pth \ No newline at end of file diff --git a/.history/.gitignore_20251002092221 b/.history/.gitignore_20251002092221 new file mode 100644 index 0000000000000000000000000000000000000000..a9acdc92a5843904ff3b1bb6dfe85b62b2658d93 --- /dev/null +++ b/.history/.gitignore_20251002092221 @@ -0,0 +1,11 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/best_swin_model_final.pth +outputs/new_model/best_model_final_TTA.pth +outputs/new_model2/best_model_final_Focal.pth \ No newline at end of file diff --git a/.history/.gitignore_20251002092235 b/.history/.gitignore_20251002092235 new file mode 100644 index 0000000000000000000000000000000000000000..5d222dacb4119cd3d98084dc5e6364c25b30e088 --- /dev/null +++ b/.history/.gitignore_20251002092235 @@ -0,0 +1,11 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/best_swin_model_final.pth +outputs/new_model/best_model_final_TTA.pth +outputs/new_model2/best_model_final_TTA_Focal.pth \ No newline at end of file diff --git a/.history/.gitignore_20251002092259 b/.history/.gitignore_20251002092259 new file mode 100644 index 0000000000000000000000000000000000000000..d74e460d460d9541449bd707bb416032bbc9445e --- /dev/null +++ b/.history/.gitignore_20251002092259 @@ -0,0 +1,12 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/best_swin_model_final.pth +outputs/new_model/best_model_final_TTA.pth +outputs/new_model2/best_model_final_TTA_Focal.pth +outputs \ No newline at end of file diff --git a/.history/.gitignore_20251002092300 b/.history/.gitignore_20251002092300 new file mode 100644 index 0000000000000000000000000000000000000000..f02bc3e57adce4cb398f86a104b05c4ed22996f6 --- /dev/null +++ b/.history/.gitignore_20251002092300 @@ -0,0 +1,12 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/best_swin_model_final.pth +outputs/new_model/best_model_final_TTA.pth +outputs/new_model2/best_model_final_TTA_Focal.pth +outputs/ \ No newline at end of file diff --git a/.history/.gitignore_20251002092307 b/.history/.gitignore_20251002092307 new file mode 100644 index 0000000000000000000000000000000000000000..578904c667b2c342da01c063991c19ab60be33dc --- /dev/null +++ b/.history/.gitignore_20251002092307 @@ -0,0 +1,12 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/best_swin_model_final.pth +outputs/new_model/best_model_final_TTA.pth +outputs/new_model2/best_model_final_TTA_Focal.pth +outputs/new_model/ \ No newline at end of file diff --git a/.history/.gitignore_20251002092316 b/.history/.gitignore_20251002092316 new file mode 100644 index 0000000000000000000000000000000000000000..e4be6faf97d8b0c9d2be8c486aa56dcc76b7b208 --- /dev/null +++ b/.history/.gitignore_20251002092316 @@ -0,0 +1,12 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/best_swin_model_final.pth +outputs/new_model/best_model_final_TTA.pth +outputs/new_model2/best_model_final_TTA_Focal.pth +outputs/new_model/best_model_final_TTA.pth \ No newline at end of file diff --git a/.history/.gitignore_20251002092324 b/.history/.gitignore_20251002092324 new file mode 100644 index 0000000000000000000000000000000000000000..d4266b664eb3959aa9761d6ae09e723f9013f0df --- /dev/null +++ b/.history/.gitignore_20251002092324 @@ -0,0 +1,12 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/best_swin_model_final.pth +outputs/new_model/best_model_final_TTA.pth +outputs/new_model2/best_model_final_TTA_Focal.pth +outputs/new_model/best_model_final_TTA+.pth \ No newline at end of file diff --git a/.history/.gitignore_20251002092330 b/.history/.gitignore_20251002092330 new file mode 100644 index 0000000000000000000000000000000000000000..e41f2808a82e980b5a600a1c7dacaef1edce6b50 --- /dev/null +++ b/.history/.gitignore_20251002092330 @@ -0,0 +1,12 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/best_swin_model_final.pth +outputs/new_model/best_model_final_TTA.pth +outputs/new_model2/best_model_final_TTA_Focal.pth +outputs/new_model/best_model_final_TTA_Focal.pth \ No newline at end of file diff --git a/.history/.gitignore_20251004171810 b/.history/.gitignore_20251004171810 new file mode 100644 index 0000000000000000000000000000000000000000..88e429967233adc7db1c54ea5e5fd78705b87403 --- /dev/null +++ b/.history/.gitignore_20251004171810 @@ -0,0 +1,13 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/best_swin_model_final.pth +outputs/new_model/best_model_final_TTA.pth +outputs/new_model2/best_model_final_TTA_Focal.pth +outputs/new_model/best_model_final_TTA_Focal.pth +C:\Users\user\Documents\Project\Cataract-ViT\outputs\models\best_swin_weights_only.pth \ No newline at end of file diff --git a/.history/.gitignore_20251004171813 b/.history/.gitignore_20251004171813 new file mode 100644 index 0000000000000000000000000000000000000000..f2dbb700dba0601b34358c01189b60b9800d74ee --- /dev/null +++ b/.history/.gitignore_20251004171813 @@ -0,0 +1,13 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/best_swin_model_final.pth +outputs/new_model/best_model_final_TTA.pth +outputs/new_model2/best_model_final_TTA_Focal.pth +outputs/new_model/best_model_final_TTA_Focal.pth +outputs\models\best_swin_weights_only.pth \ No newline at end of file diff --git a/.history/.gitignore_20251004171816 b/.history/.gitignore_20251004171816 new file mode 100644 index 0000000000000000000000000000000000000000..a10e65c12bfe70e0114a116c32abcff5cecaaaaf --- /dev/null +++ b/.history/.gitignore_20251004171816 @@ -0,0 +1,13 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/best_swin_model_final.pth +outputs/new_model/best_model_final_TTA.pth +outputs/new_model2/best_model_final_TTA_Focal.pth +outputs/new_model/best_model_final_TTA_Focal.pth +outputs/models\best_swin_weights_only.pth \ No newline at end of file diff --git a/.history/.gitignore_20251004171818 b/.history/.gitignore_20251004171818 new file mode 100644 index 0000000000000000000000000000000000000000..16fc67aeb5a4d518bfb10508b582eec8af35f797 --- /dev/null +++ b/.history/.gitignore_20251004171818 @@ -0,0 +1,13 @@ +/dataset/ +/data/ +/.venv/ +__pycache__/ +outputs/models/best_vit_model_amp.pth +outputs/models/vit_mae_cataract_classifier.pth +logs/ +outputs/models/best_vit_model_balanced.pth +outputs/models/best_swin_model_final.pth +outputs/new_model/best_model_final_TTA.pth +outputs/new_model2/best_model_final_TTA_Focal.pth +outputs/new_model/best_model_final_TTA_Focal.pth +outputs/models/best_swin_weights_only.pth \ No newline at end of file diff --git a/.history/README_20250926210923.md b/.history/README_20250926210923.md new file mode 100644 index 0000000000000000000000000000000000000000..1d20b3128ca6a962d74bb97866a243dad18bcdad Binary files /dev/null and b/.history/README_20250926210923.md differ diff --git a/.history/README_20251010182452.md b/.history/README_20251010182452.md new file mode 100644 index 0000000000000000000000000000000000000000..e0d2773ae1c184ab2c6984f3f27665e033fb0208 Binary files /dev/null and b/.history/README_20251010182452.md differ diff --git a/.history/README_20251010182453.md b/.history/README_20251010182453.md new file mode 100644 index 0000000000000000000000000000000000000000..3d0b1c5bab179bea49f0cdc63628e7e879c60738 Binary files /dev/null and b/.history/README_20251010182453.md differ diff --git a/.history/README_20251010182456.md b/.history/README_20251010182456.md new file mode 100644 index 0000000000000000000000000000000000000000..3d0b1c5bab179bea49f0cdc63628e7e879c60738 Binary files /dev/null and b/.history/README_20251010182456.md differ diff --git a/.history/README_20251010182458.md b/.history/README_20251010182458.md new file mode 100644 index 0000000000000000000000000000000000000000..25f3a4f743ba479caa436f7fae9023748260e0b5 Binary files /dev/null and b/.history/README_20251010182458.md differ diff --git a/.history/README_20251010182459.md b/.history/README_20251010182459.md new file mode 100644 index 0000000000000000000000000000000000000000..3524d30c9864954fa25d83b0379e1aff9f108b4b Binary files /dev/null and b/.history/README_20251010182459.md differ diff --git a/.history/README_20251010182509.md b/.history/README_20251010182509.md new file mode 100644 index 0000000000000000000000000000000000000000..7d170da88f5059dc62307b8ec994fc7737418f94 Binary files /dev/null and b/.history/README_20251010182509.md differ diff --git a/.history/README_20251010182511.md b/.history/README_20251010182511.md new file mode 100644 index 0000000000000000000000000000000000000000..3524d30c9864954fa25d83b0379e1aff9f108b4b Binary files /dev/null and b/.history/README_20251010182511.md differ diff --git a/.history/README_20251010182515.md b/.history/README_20251010182515.md new file mode 100644 index 0000000000000000000000000000000000000000..7d170da88f5059dc62307b8ec994fc7737418f94 Binary files /dev/null and b/.history/README_20251010182515.md differ diff --git a/.history/README_20251010182522.md b/.history/README_20251010182522.md new file mode 100644 index 0000000000000000000000000000000000000000..46b134b197f35e75e0784bedbf94a8dd124693b1 --- /dev/null +++ b/.history/README_20251010182522.md @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/.history/README_20251010182523.md b/.history/README_20251010182523.md new file mode 100644 index 0000000000000000000000000000000000000000..fc461671990e64471ffd1bf527bd81bd3eabedaa Binary files /dev/null and b/.history/README_20251010182523.md differ diff --git a/.history/README_20251010182530.md b/.history/README_20251010182530.md new file mode 100644 index 0000000000000000000000000000000000000000..30399597dd1ed5f52770de4334aaa518cf698fc5 Binary files /dev/null and b/.history/README_20251010182530.md differ diff --git a/.history/README_20251010182543.md b/.history/README_20251010182543.md new file mode 100644 index 0000000000000000000000000000000000000000..f9c2303e7f41f4c753667568e19e1a020a64b72c Binary files /dev/null and b/.history/README_20251010182543.md differ diff --git a/.history/README_20251010182554.md b/.history/README_20251010182554.md new file mode 100644 index 0000000000000000000000000000000000000000..3d0b1c5bab179bea49f0cdc63628e7e879c60738 Binary files /dev/null and b/.history/README_20251010182554.md differ diff --git a/.history/README_20251010182557.md b/.history/README_20251010182557.md new file mode 100644 index 0000000000000000000000000000000000000000..a8baf89bedd629b78914b2fad39904bc62f276cb Binary files /dev/null and b/.history/README_20251010182557.md differ diff --git a/.history/README_20251010182608.md b/.history/README_20251010182608.md new file mode 100644 index 0000000000000000000000000000000000000000..cfddd2dad1ecca9fd61b97b9248fa55e3c780bef Binary files /dev/null and b/.history/README_20251010182608.md differ diff --git a/.history/README_20251010182612.md b/.history/README_20251010182612.md new file mode 100644 index 0000000000000000000000000000000000000000..9cea8c6c9d8c25efcc15962411e0f692d2c00abe Binary files /dev/null and b/.history/README_20251010182612.md differ diff --git a/.history/README_20251010182620.md b/.history/README_20251010182620.md new file mode 100644 index 0000000000000000000000000000000000000000..4fc863d7c30df73be2228d6a9968ba46048be0d0 Binary files /dev/null and b/.history/README_20251010182620.md differ diff --git a/.history/README_20251010182622.md b/.history/README_20251010182622.md new file mode 100644 index 0000000000000000000000000000000000000000..c29f80d8549718b4ece45b576747e895cde91697 Binary files /dev/null and b/.history/README_20251010182622.md differ diff --git a/.history/README_20251010182630.md b/.history/README_20251010182630.md new file mode 100644 index 0000000000000000000000000000000000000000..2f1cf7ba63e3a7c5235284c72fcf375a2012ef89 Binary files /dev/null and b/.history/README_20251010182630.md differ diff --git a/.history/README_20251010182634.md b/.history/README_20251010182634.md new file mode 100644 index 0000000000000000000000000000000000000000..0854ffb5caf851c9bffec4c7a848261d421a011a Binary files /dev/null and b/.history/README_20251010182634.md differ diff --git a/.history/README_20251010182643.md b/.history/README_20251010182643.md new file mode 100644 index 0000000000000000000000000000000000000000..f6abf81bc769b2db87988cebd24dbafe2377c30b Binary files /dev/null and b/.history/README_20251010182643.md differ diff --git a/.history/README_20251010182649.md b/.history/README_20251010182649.md new file mode 100644 index 0000000000000000000000000000000000000000..0330c63f1a5d895862e74529f227a9fdb9fb9949 Binary files /dev/null and b/.history/README_20251010182649.md differ diff --git a/.history/README_20251010182657.md b/.history/README_20251010182657.md new file mode 100644 index 0000000000000000000000000000000000000000..ed0a80e2ac9a47be28a9cae8171a9d5428a1fa5c Binary files /dev/null and b/.history/README_20251010182657.md differ diff --git a/.history/README_20251010182658.md b/.history/README_20251010182658.md new file mode 100644 index 0000000000000000000000000000000000000000..f6db07866af0c1756ce9e27fe0f1b9d8238ba7fa Binary files /dev/null and b/.history/README_20251010182658.md differ diff --git a/.history/README_20251010182700.md b/.history/README_20251010182700.md new file mode 100644 index 0000000000000000000000000000000000000000..41c1c9b82b8af12daf0866aa2aa69b59a86b9bff Binary files /dev/null and b/.history/README_20251010182700.md differ diff --git a/.history/README_20251010182703.md b/.history/README_20251010182703.md new file mode 100644 index 0000000000000000000000000000000000000000..715ed0c24ba57ba77c3ca40f7527c6ab94e7329c Binary files /dev/null and b/.history/README_20251010182703.md differ diff --git a/.history/README_20251010183711.md b/.history/README_20251010183711.md new file mode 100644 index 0000000000000000000000000000000000000000..715ed0c24ba57ba77c3ca40f7527c6ab94e7329c Binary files /dev/null and b/.history/README_20251010183711.md differ diff --git a/.history/README_20251010183712.md b/.history/README_20251010183712.md new file mode 100644 index 0000000000000000000000000000000000000000..715ed0c24ba57ba77c3ca40f7527c6ab94e7329c Binary files /dev/null and b/.history/README_20251010183712.md differ diff --git a/.history/README_20251010183725.md b/.history/README_20251010183725.md new file mode 100644 index 0000000000000000000000000000000000000000..715ed0c24ba57ba77c3ca40f7527c6ab94e7329c Binary files /dev/null and b/.history/README_20251010183725.md differ diff --git a/.history/README_20251010183726.md b/.history/README_20251010183726.md new file mode 100644 index 0000000000000000000000000000000000000000..715ed0c24ba57ba77c3ca40f7527c6ab94e7329c Binary files /dev/null and b/.history/README_20251010183726.md differ diff --git a/.history/README_20251010183727.md b/.history/README_20251010183727.md new file mode 100644 index 0000000000000000000000000000000000000000..715ed0c24ba57ba77c3ca40f7527c6ab94e7329c Binary files /dev/null and b/.history/README_20251010183727.md differ diff --git a/.history/README_20251010184447.md b/.history/README_20251010184447.md new file mode 100644 index 0000000000000000000000000000000000000000..2fc876099a807a45004ee869ea2eeb6c96c83612 Binary files /dev/null and b/.history/README_20251010184447.md differ diff --git a/.history/README_20251010184555.md b/.history/README_20251010184555.md new file mode 100644 index 0000000000000000000000000000000000000000..120b2359ab1922e6d65098befae98023f13818bd Binary files /dev/null and b/.history/README_20251010184555.md differ diff --git a/.history/README_20251010184604.md b/.history/README_20251010184604.md new file mode 100644 index 0000000000000000000000000000000000000000..d90b8a25de62ce10989110f6006db58a22561dad Binary files /dev/null and b/.history/README_20251010184604.md differ diff --git a/.history/README_20251010184757.md b/.history/README_20251010184757.md new file mode 100644 index 0000000000000000000000000000000000000000..c0c8c58f2be29a71b8d0106c295bd59f6ded213f Binary files /dev/null and b/.history/README_20251010184757.md differ diff --git a/.history/requirements_20250923122350.txt b/.history/requirements_20250923122350.txt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.history/requirements_20250923122851.txt b/.history/requirements_20250923122851.txt new file mode 100644 index 0000000000000000000000000000000000000000..62e257692a3dab62cbbe35054594a5de6537a16c --- /dev/null +++ b/.history/requirements_20250923122851.txt @@ -0,0 +1,10 @@ +torch +torchvision +torchaudio +timm +scikit-learn +opencv-python +matplotlib +seaborn +albumentations +wandb diff --git a/.history/requirements_20250923122852.txt b/.history/requirements_20250923122852.txt new file mode 100644 index 0000000000000000000000000000000000000000..62e257692a3dab62cbbe35054594a5de6537a16c --- /dev/null +++ b/.history/requirements_20250923122852.txt @@ -0,0 +1,10 @@ +torch +torchvision +torchaudio +timm +scikit-learn +opencv-python +matplotlib +seaborn +albumentations +wandb diff --git a/.history/src/__init___20250923122539.py b/.history/src/__init___20250923122539.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.history/src/__init___20250923122631.py b/.history/src/__init___20250923122631.py new file mode 100644 index 0000000000000000000000000000000000000000..ef114c73a16067a0152e5206e0e75f2a08c4ef12 --- /dev/null +++ b/.history/src/__init___20250923122631.py @@ -0,0 +1,7 @@ +# src/__init__.py + +# Biar bisa langsung import fungsi/kelas dari dataset dan model +from .dataset import CustomDataset +from .model import get_vit_model + +__all__ = ["CustomDataset", "get_vit_model"] diff --git a/.history/src/convert_to_state_dict_20251004155903.py b/.history/src/convert_to_state_dict_20251004155903.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.history/src/convert_to_state_dict_20251004155921.py b/.history/src/convert_to_state_dict_20251004155921.py new file mode 100644 index 0000000000000000000000000000000000000000..462773ae37e395b14543d2c712eda9084646ae04 --- /dev/null +++ b/.history/src/convert_to_state_dict_20251004155921.py @@ -0,0 +1,24 @@ +import torch + +# Path model lama (yang masih berisi objek model penuh) +old_model_path = r"C:\Users\user\Documents\Project\Cataract-ViT\outputs\models\best_swin_model_final.pth" + +# Path output baru (hanya berisi bobot/weight) +new_model_path = r"C:\Users\user\Documents\Project\Cataract-ViT\outputs\models\best_swin_weights_only.pth" + +# Load model lama (pastikan ini aman karena model dari kamu sendiri) +model = torch.load(old_model_path, map_location="cpu") + +# Jika model disimpan sebagai dictionary (misal {'model': ..., 'optimizer': ...}) +# kita ambil bagian state_dict-nya +if isinstance(model, dict) and "state_dict" in model: + state_dict = model["state_dict"] +elif hasattr(model, "state_dict"): + state_dict = model.state_dict() +else: + raise ValueError("File tidak berisi objek model yang bisa diambil state_dict-nya") + +# Simpan ulang hanya state_dict +torch.save(state_dict, new_model_path) + +print(f"✅ State dict berhasil disimpan ulang ke:\n{new_model_path}") diff --git a/.history/src/convert_to_state_dict_20251004160001.py b/.history/src/convert_to_state_dict_20251004160001.py new file mode 100644 index 0000000000000000000000000000000000000000..d1ec06a37cb90cec4987f6280554576069428ea7 --- /dev/null +++ b/.history/src/convert_to_state_dict_20251004160001.py @@ -0,0 +1,24 @@ +import torch + +# Path model lama (yang masih berisi objek model penuh) +old_model_path = r"outputs\models\best_swin_model_final.pth" + +# Path output baru (hanya berisi bobot/weight) +new_model_path = r"C:\Users\user\Documents\Project\Cataract-ViT\outputs\models\best_swin_weights_only.pth" + +# Load model lama (pastikan ini aman karena model dari kamu sendiri) +model = torch.load(old_model_path, map_location="cpu") + +# Jika model disimpan sebagai dictionary (misal {'model': ..., 'optimizer': ...}) +# kita ambil bagian state_dict-nya +if isinstance(model, dict) and "state_dict" in model: + state_dict = model["state_dict"] +elif hasattr(model, "state_dict"): + state_dict = model.state_dict() +else: + raise ValueError("File tidak berisi objek model yang bisa diambil state_dict-nya") + +# Simpan ulang hanya state_dict +torch.save(state_dict, new_model_path) + +print(f"✅ State dict berhasil disimpan ulang ke:\n{new_model_path}") diff --git a/.history/src/convert_to_state_dict_20251004160003.py b/.history/src/convert_to_state_dict_20251004160003.py new file mode 100644 index 0000000000000000000000000000000000000000..2a97ea3060344f276c6cf5848cb334cd8213ae0d --- /dev/null +++ b/.history/src/convert_to_state_dict_20251004160003.py @@ -0,0 +1,24 @@ +import torch + +# Path model lama (yang masih berisi objek model penuh) +old_model_path = r"outputs/models\best_swin_model_final.pth" + +# Path output baru (hanya berisi bobot/weight) +new_model_path = r"C:\Users\user\Documents\Project\Cataract-ViT\outputs\models\best_swin_weights_only.pth" + +# Load model lama (pastikan ini aman karena model dari kamu sendiri) +model = torch.load(old_model_path, map_location="cpu") + +# Jika model disimpan sebagai dictionary (misal {'model': ..., 'optimizer': ...}) +# kita ambil bagian state_dict-nya +if isinstance(model, dict) and "state_dict" in model: + state_dict = model["state_dict"] +elif hasattr(model, "state_dict"): + state_dict = model.state_dict() +else: + raise ValueError("File tidak berisi objek model yang bisa diambil state_dict-nya") + +# Simpan ulang hanya state_dict +torch.save(state_dict, new_model_path) + +print(f"✅ State dict berhasil disimpan ulang ke:\n{new_model_path}") diff --git a/.history/src/convert_to_state_dict_20251004160006.py b/.history/src/convert_to_state_dict_20251004160006.py new file mode 100644 index 0000000000000000000000000000000000000000..76081b1166cabe022de451a906b4fd768a52b972 --- /dev/null +++ b/.history/src/convert_to_state_dict_20251004160006.py @@ -0,0 +1,24 @@ +import torch + +# Path model lama (yang masih berisi objek model penuh) +old_model_path = r"outputs/models/best_swin_model_final.pth" + +# Path output baru (hanya berisi bobot/weight) +new_model_path = r"C:\Users\user\Documents\Project\Cataract-ViT\outputs\models\best_swin_weights_only.pth" + +# Load model lama (pastikan ini aman karena model dari kamu sendiri) +model = torch.load(old_model_path, map_location="cpu") + +# Jika model disimpan sebagai dictionary (misal {'model': ..., 'optimizer': ...}) +# kita ambil bagian state_dict-nya +if isinstance(model, dict) and "state_dict" in model: + state_dict = model["state_dict"] +elif hasattr(model, "state_dict"): + state_dict = model.state_dict() +else: + raise ValueError("File tidak berisi objek model yang bisa diambil state_dict-nya") + +# Simpan ulang hanya state_dict +torch.save(state_dict, new_model_path) + +print(f"✅ State dict berhasil disimpan ulang ke:\n{new_model_path}") diff --git a/.history/src/convert_to_state_dict_20251004160012.py b/.history/src/convert_to_state_dict_20251004160012.py new file mode 100644 index 0000000000000000000000000000000000000000..7b58108cbb07510a8e3e1a57753bf7960c5b6999 --- /dev/null +++ b/.history/src/convert_to_state_dict_20251004160012.py @@ -0,0 +1,24 @@ +import torch + +# Path model lama (yang masih berisi objek model penuh) +old_model_path = r"outputs/models/best_swin_model_final.pth" + +# Path output baru (hanya berisi bobot/weight) +new_model_path = r"outputs\models\best_swin_weights_only.pth" + +# Load model lama (pastikan ini aman karena model dari kamu sendiri) +model = torch.load(old_model_path, map_location="cpu") + +# Jika model disimpan sebagai dictionary (misal {'model': ..., 'optimizer': ...}) +# kita ambil bagian state_dict-nya +if isinstance(model, dict) and "state_dict" in model: + state_dict = model["state_dict"] +elif hasattr(model, "state_dict"): + state_dict = model.state_dict() +else: + raise ValueError("File tidak berisi objek model yang bisa diambil state_dict-nya") + +# Simpan ulang hanya state_dict +torch.save(state_dict, new_model_path) + +print(f"✅ State dict berhasil disimpan ulang ke:\n{new_model_path}") diff --git a/.history/src/convert_to_state_dict_20251004160021.py b/.history/src/convert_to_state_dict_20251004160021.py new file mode 100644 index 0000000000000000000000000000000000000000..db41f03db60d878c75a4d13f88bc6b4424f9db18 --- /dev/null +++ b/.history/src/convert_to_state_dict_20251004160021.py @@ -0,0 +1,24 @@ +import torch + +# Path model lama (yang masih berisi objek model penuh) +old_model_path = r"outputsmodels/best_swin_model_final.pth" + +# Path output baru (hanya berisi bobot/weight) +new_model_path = r"outputs\models\best_swin_weights_only.pth" + +# Load model lama (pastikan ini aman karena model dari kamu sendiri) +model = torch.load(old_model_path, map_location="cpu") + +# Jika model disimpan sebagai dictionary (misal {'model': ..., 'optimizer': ...}) +# kita ambil bagian state_dict-nya +if isinstance(model, dict) and "state_dict" in model: + state_dict = model["state_dict"] +elif hasattr(model, "state_dict"): + state_dict = model.state_dict() +else: + raise ValueError("File tidak berisi objek model yang bisa diambil state_dict-nya") + +# Simpan ulang hanya state_dict +torch.save(state_dict, new_model_path) + +print(f"✅ State dict berhasil disimpan ulang ke:\n{new_model_path}") diff --git a/.history/src/convert_to_state_dict_20251004160510.py b/.history/src/convert_to_state_dict_20251004160510.py new file mode 100644 index 0000000000000000000000000000000000000000..462773ae37e395b14543d2c712eda9084646ae04 --- /dev/null +++ b/.history/src/convert_to_state_dict_20251004160510.py @@ -0,0 +1,24 @@ +import torch + +# Path model lama (yang masih berisi objek model penuh) +old_model_path = r"C:\Users\user\Documents\Project\Cataract-ViT\outputs\models\best_swin_model_final.pth" + +# Path output baru (hanya berisi bobot/weight) +new_model_path = r"C:\Users\user\Documents\Project\Cataract-ViT\outputs\models\best_swin_weights_only.pth" + +# Load model lama (pastikan ini aman karena model dari kamu sendiri) +model = torch.load(old_model_path, map_location="cpu") + +# Jika model disimpan sebagai dictionary (misal {'model': ..., 'optimizer': ...}) +# kita ambil bagian state_dict-nya +if isinstance(model, dict) and "state_dict" in model: + state_dict = model["state_dict"] +elif hasattr(model, "state_dict"): + state_dict = model.state_dict() +else: + raise ValueError("File tidak berisi objek model yang bisa diambil state_dict-nya") + +# Simpan ulang hanya state_dict +torch.save(state_dict, new_model_path) + +print(f"✅ State dict berhasil disimpan ulang ke:\n{new_model_path}") diff --git a/.history/src/convert_to_state_dict_20251004160739.py b/.history/src/convert_to_state_dict_20251004160739.py new file mode 100644 index 0000000000000000000000000000000000000000..51b9ec97e8beb3d82a60fdf0b22c55e043742b3d --- /dev/null +++ b/.history/src/convert_to_state_dict_20251004160739.py @@ -0,0 +1,24 @@ +import torch + +# Path model lama (yang masih berisi objek model penuh) +old_model_path = r"C:\Users\user\Documents\Project\Cataract-ViT\outputs\models\best_swin_model_final.pth" + +# Path output baru (hanya berisi bobot/weight) +new_model_path = r"C:\Users\user\Documents\Project\Cataract-ViT\outputs\models\best_swin_weights_only.pth" + +# Load model lama (pastikan ini aman karena model dari kamu sendiri) +model = torch.load(old_model_path, map_location="cpu", weights_only=False) + +# Jika model disimpan sebagai dictionary (misal {'model': ..., 'optimizer': ...}) +# kita ambil bagian state_dict-nya +if isinstance(model, dict) and "state_dict" in model: + state_dict = model["state_dict"] +elif hasattr(model, "state_dict"): + state_dict = model.state_dict() +else: + raise ValueError("File tidak berisi objek model yang bisa diambil state_dict-nya") + +# Simpan ulang hanya state_dict +torch.save(state_dict, new_model_path) + +print(f"✅ State dict berhasil disimpan ulang ke:\n{new_model_path}") diff --git a/.history/src/convert_to_state_dict_20251004160757.py b/.history/src/convert_to_state_dict_20251004160757.py new file mode 100644 index 0000000000000000000000000000000000000000..9d24a219b2d3de7af7a1bfa3bfef7d7872335fa6 --- /dev/null +++ b/.history/src/convert_to_state_dict_20251004160757.py @@ -0,0 +1,27 @@ +import torch + +# Path model lama (yang masih berisi objek model penuh) +old_model_path = r"C:\Users\user\Documents\Project\Cataract-ViT\outputs\models\best_swin_model_final.pth" + +# Path output baru (hanya berisi bobot/weight) +new_model_path = r"C:\Users\user\Documents\Project\Cataract-ViT\outputs\models\best_swin_weights_only.pth" + +# Tambahkan ke daftar yang aman +add_safe_globals([CrossEntropyLoss]) + +# Load model lama (pastikan ini aman karena model dari kamu sendiri) +model = torch.load(old_model_path, map_location="cpu", weights_only=False) + +# Jika model disimpan sebagai dictionary (misal {'model': ..., 'optimizer': ...}) +# kita ambil bagian state_dict-nya +if isinstance(model, dict) and "state_dict" in model: + state_dict = model["state_dict"] +elif hasattr(model, "state_dict"): + state_dict = model.state_dict() +else: + raise ValueError("File tidak berisi objek model yang bisa diambil state_dict-nya") + +# Simpan ulang hanya state_dict +torch.save(state_dict, new_model_path) + +print(f"✅ State dict berhasil disimpan ulang ke:\n{new_model_path}") diff --git a/.history/src/convert_to_state_dict_20251004160810.py b/.history/src/convert_to_state_dict_20251004160810.py new file mode 100644 index 0000000000000000000000000000000000000000..4a981ccd82bb7624592db9f609d3469c022cd79b --- /dev/null +++ b/.history/src/convert_to_state_dict_20251004160810.py @@ -0,0 +1,29 @@ +import torch +from torch.serialization import add_safe_globals +from torch.nn.modules.loss import CrossEntropyLoss + +# Path model lama (yang masih berisi objek model penuh) +old_model_path = r"C:\Users\user\Documents\Project\Cataract-ViT\outputs\models\best_swin_model_final.pth" + +# Path output baru (hanya berisi bobot/weight) +new_model_path = r"C:\Users\user\Documents\Project\Cataract-ViT\outputs\models\best_swin_weights_only.pth" + +# Tambahkan ke daftar yang aman +add_safe_globals([CrossEntropyLoss]) + +# Load model lama (pastikan ini aman karena model dari kamu sendiri) +model = torch.load(old_model_path, map_location="cpu", weights_only=False) + +# Jika model disimpan sebagai dictionary (misal {'model': ..., 'optimizer': ...}) +# kita ambil bagian state_dict-nya +if isinstance(model, dict) and "state_dict" in model: + state_dict = model["state_dict"] +elif hasattr(model, "state_dict"): + state_dict = model.state_dict() +else: + raise ValueError("File tidak berisi objek model yang bisa diambil state_dict-nya") + +# Simpan ulang hanya state_dict +torch.save(state_dict, new_model_path) + +print(f"✅ State dict berhasil disimpan ulang ke:\n{new_model_path}") diff --git a/.history/src/convert_to_state_dict_20251004160857.py b/.history/src/convert_to_state_dict_20251004160857.py new file mode 100644 index 0000000000000000000000000000000000000000..7329ee07caa4d7c27cd93fe63c56b935d11c4b99 --- /dev/null +++ b/.history/src/convert_to_state_dict_20251004160857.py @@ -0,0 +1,34 @@ +import torch +from torch.serialization import add_safe_globals +from torch.nn.modules.loss import CrossEntropyLoss + +# Tambahkan whitelist agar bisa load aman +add_safe_globals([CrossEntropyLoss]) + +old_model_path = r"C:\Users\user\Documents\Project\Cataract-ViT\outputs\models\best_swin_model_final.pth" +new_model_path = r"C:\Users\user\Documents\Project\Cataract-ViT\outputs\models\best_swin_weights_only.pth" + +# Load dengan weights_only=False karena ini file trusted +checkpoint = torch.load(old_model_path, map_location="cpu", weights_only=False) + +# Deteksi isi file +if isinstance(checkpoint, dict): + # Coba ambil beberapa kemungkinan key umum + if "state_dict" in checkpoint: + state_dict = checkpoint["state_dict"] + elif "model_state_dict" in checkpoint: + state_dict = checkpoint["model_state_dict"] + elif "model" in checkpoint and hasattr(checkpoint["model"], "state_dict"): + state_dict = checkpoint["model"].state_dict() + else: + raise ValueError(f"❌ Tidak ditemukan key state_dict dalam checkpoint: {checkpoint.keys()}") +elif hasattr(checkpoint, "state_dict"): + # Kalau model langsung + state_dict = checkpoint.state_dict() +else: + raise ValueError("❌ File tidak berisi model atau dictionary dengan state_dict yang valid.") + +# Simpan ulang hanya weight-nya +torch.save(state_dict, new_model_path) + +print(f"✅ State dict berhasil diekstrak dan disimpan ke:\n{new_model_path}") diff --git a/.history/src/dataset_20250923122211.py b/.history/src/dataset_20250923122211.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.history/src/dataset_20250923122709.py b/.history/src/dataset_20250923122709.py new file mode 100644 index 0000000000000000000000000000000000000000..d35dd5caf31b4b469ce5409fff944d3879d26705 --- /dev/null +++ b/.history/src/dataset_20250923122709.py @@ -0,0 +1,8 @@ +# src/dataset.py + +class CustomDataset: + def __init__(self, data_path): + self.data_path = data_path + + def __len__(self): + return 0 # nanti diganti sesuai dataset diff --git a/.history/src/dataset_20250923143501.py b/.history/src/dataset_20250923143501.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.history/src/dataset_20250923143553.py b/.history/src/dataset_20250923143553.py new file mode 100644 index 0000000000000000000000000000000000000000..b5e00031bb7899e99a34cfd8df9dd5af3f301806 --- /dev/null +++ b/.history/src/dataset_20250923143553.py @@ -0,0 +1,74 @@ +# src/dataset.py + +import torch +from torchvision import datasets, transforms +from torch.utils.data import DataLoader + +def get_transforms(image_size): + """ + Mendefinisikan pipeline augmentasi dan transformasi data. + Sesuai dengan metodologi: rotasi, flip, brightness, contrast, dll. + """ + # Augmentasi untuk data training untuk membuat model lebih robust + train_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomRotation(35), + transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2), + transforms.ToTensor(), + # Normalisasi menggunakan mean dan std dari ImageNet (praktik standar) + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + # Untuk data validasi, kita tidak melakukan augmentasi, hanya resize dan normalisasi + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + return train_transform, valid_transform + +def get_dataloaders(data_dir, batch_size, image_size): + """ + Membuat DataLoaders untuk training dan validasi. + + Args: + data_dir (str): Path ke direktori data ('../data'). + batch_size (int): Ukuran batch. + image_size (int): Ukuran gambar (misal: 224). + + Returns: + train_loader, valid_loader, dataset_classes + """ + train_transform, valid_transform = get_transforms(image_size) + + # Membuat dataset dari folder yang sudah di-split + # datasets.ImageFolder secara otomatis menemukan kelas dari nama subfolder + train_dataset = datasets.ImageFolder(f"{data_dir}/train", transform=train_transform) + valid_dataset = datasets.ImageFolder(f"{data_dir}/valid", transform=valid_transform) + + # Membuat DataLoader + # **PENTING**: num_workers di sini untuk mengatasi masalah laptop macet. + # Jika laptop Anda lambat/macet, ubah num_workers menjadi 2, 1, atau 0. + train_loader = DataLoader( + train_dataset, + batch_size=batch_size, + shuffle=True, + num_workers=2, # Turunkan nilai ini jika RAM Anda terbatas + pin_memory=True + ) + valid_loader = DataLoader( + valid_dataset, + batch_size=batch_size, + shuffle=False, + num_workers=2, # Turunkan nilai ini jika RAM Anda terbatas + pin_memory=True + ) + + # Mendapatkan nama kelas dari dataset + dataset_classes = train_dataset.classes + + print(f"Data berhasil dimuat. Ditemukan {len(dataset_classes)} kelas: {dataset_classes}") + + return train_loader, valid_loader, dataset_classes \ No newline at end of file diff --git a/.history/src/dataset_20250925122222.py b/.history/src/dataset_20250925122222.py new file mode 100644 index 0000000000000000000000000000000000000000..f60bc39bd1321697a25631d5b36699354e0d4a9c --- /dev/null +++ b/.history/src/dataset_20250925122222.py @@ -0,0 +1,63 @@ +# src/dataset.py + +import torch +from torchvision import datasets, transforms +from torch.utils.data import DataLoader + +def get_transforms(image_size): + """ + Mendefinisikan pipeline augmentasi dan transformasi data. + Versi ini menggunakan augmentasi yang lebih kuat untuk mengurangi overfitting. + """ + # Augmentasi agresif untuk data training + train_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + # Menambahkan augmentasi baru & memperkuat yang lama + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomVerticalFlip(p=0.5), + transforms.RandomRotation(45), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0), ratio=(0.9, 1.1)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + # Untuk data validasi, tidak ada augmentasi + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + return train_transform, valid_transform + +def get_dataloaders(data_dir, batch_size, image_size): + """ + Membuat DataLoaders untuk training dan validasi. + """ + train_transform, valid_transform = get_transforms(image_size) + + train_dataset = datasets.ImageFolder(f"{data_dir}/train", transform=train_transform) + valid_dataset = datasets.ImageFolder(f"{data_dir}/valid", transform=valid_transform) + + # Turunkan num_workers jika RAM Anda terbatas + train_loader = DataLoader( + train_dataset, + batch_size=batch_size, + shuffle=True, + num_workers=2, + pin_memory=True + ) + valid_loader = DataLoader( + valid_dataset, + batch_size=batch_size, + shuffle=False, + num_workers=2, + pin_memory=True + ) + + dataset_classes = train_dataset.classes + + print(f"Data berhasil dimuat. Ditemukan {len(dataset_classes)} kelas: {dataset_classes}") + + return train_loader, valid_loader, dataset_classes \ No newline at end of file diff --git a/.history/src/dataset_20250926073706.py b/.history/src/dataset_20250926073706.py new file mode 100644 index 0000000000000000000000000000000000000000..f8281111031f06b9bdd1b3d3e2dda85f806e1822 --- /dev/null +++ b/.history/src/dataset_20250926073706.py @@ -0,0 +1,63 @@ +# src/dataset.py + +import torch +from torchvision import datasets, transforms +from torch.utils.data import DataLoader + +def get_transforms(image_size): + """ + Mendefinisikan pipeline augmentasi dan transformasi data. + Versi ini menggunakan augmentasi yang lebih kuat untuk mengurangi overfitting. + """ + # Augmentasi agresif untuk data training + train_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + # Menambahkan augmentasi baru & memperkuat yang lama + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomVerticalFlip(p=0.5), + transforms.RandomRotation(45), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0), ratio=(0.9, 1.1)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + # Untuk data validasi, tidak ada augmentasi + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + return train_transform, valid_transform + +def get_dataloaders(data_dir, batch_size, image_size): + """ + Membuat DataLoaders untuk training dan validasi. + """ + train_transform, valid_transform = get_transforms(image_size) + + train_dataset = datasets.ImageFolder(f"{data_dir}/train", transform=train_transform) + valid_dataset = datasets.ImageFolder(f"{data_dir}/valid", transform=valid_transform) + + # Turunkan num_workers jika RAM Anda terbatas + train_loader = DataLoader( + train_dataset, + batch_size=batch_size, + shuffle=True, + num_workers=4, + pin_memory=True + ) + valid_loader = DataLoader( + valid_dataset, + batch_size=batch_size, + shuffle=False, + num_workers=2, + pin_memory=True + ) + + dataset_classes = train_dataset.classes + + print(f"Data berhasil dimuat. Ditemukan {len(dataset_classes)} kelas: {dataset_classes}") + + return train_loader, valid_loader, dataset_classes \ No newline at end of file diff --git a/.history/src/dataset_20250926073710.py b/.history/src/dataset_20250926073710.py new file mode 100644 index 0000000000000000000000000000000000000000..4a0156e3b1be1fc17cb989d440809e2a759a35e6 --- /dev/null +++ b/.history/src/dataset_20250926073710.py @@ -0,0 +1,63 @@ +# src/dataset.py + +import torch +from torchvision import datasets, transforms +from torch.utils.data import DataLoader + +def get_transforms(image_size): + """ + Mendefinisikan pipeline augmentasi dan transformasi data. + Versi ini menggunakan augmentasi yang lebih kuat untuk mengurangi overfitting. + """ + # Augmentasi agresif untuk data training + train_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + # Menambahkan augmentasi baru & memperkuat yang lama + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomVerticalFlip(p=0.5), + transforms.RandomRotation(45), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0), ratio=(0.9, 1.1)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + # Untuk data validasi, tidak ada augmentasi + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + return train_transform, valid_transform + +def get_dataloaders(data_dir, batch_size, image_size): + """ + Membuat DataLoaders untuk training dan validasi. + """ + train_transform, valid_transform = get_transforms(image_size) + + train_dataset = datasets.ImageFolder(f"{data_dir}/train", transform=train_transform) + valid_dataset = datasets.ImageFolder(f"{data_dir}/valid", transform=valid_transform) + + # Turunkan num_workers jika RAM Anda terbatas + train_loader = DataLoader( + train_dataset, + batch_size=batch_size, + shuffle=True, + num_workers=4, + pin_memory=True + ) + valid_loader = DataLoader( + valid_dataset, + batch_size=batch_size, + shuffle=False, + num_workers=4, + pin_memory=True + ) + + dataset_classes = train_dataset.classes + + print(f"Data berhasil dimuat. Ditemukan {len(dataset_classes)} kelas: {dataset_classes}") + + return train_loader, valid_loader, dataset_classes \ No newline at end of file diff --git a/.history/src/dataset_20250926073745.py b/.history/src/dataset_20250926073745.py new file mode 100644 index 0000000000000000000000000000000000000000..ad500f77eae0d636eaa68816f6131538f78afb4a --- /dev/null +++ b/.history/src/dataset_20250926073745.py @@ -0,0 +1,62 @@ +# src/dataset.py + +import torch +from torchvision import datasets, transforms +from torch.utils.data import DataLoader + +def get_transforms(image_size): + """ + Mendefinisikan pipeline augmentasi dan transformasi data. + Versi ini menggunakan augmentasi yang lebih kuat untuk mengurangi overfitting. + """ + # Augmentasi agresif untuk data training + train_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomVerticalFlip(p=0.5), + transforms.RandomRotation(45), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0), ratio=(0.9, 1.1)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + # Untuk data validasi, tidak ada augmentasi + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + return train_transform, valid_transform + +def get_dataloaders(data_dir, batch_size, image_size): + """ + Membuat DataLoaders untuk training dan validasi. + """ + train_transform, valid_transform = get_transforms(image_size) + + train_dataset = datasets.ImageFolder(f"{data_dir}/train", transform=train_transform) + valid_dataset = datasets.ImageFolder(f"{data_dir}/valid", transform=valid_transform) + + # Turunkan num_workers jika RAM Anda terbatas + train_loader = DataLoader( + train_dataset, + batch_size=batch_size, + shuffle=True, + num_workers=4, + pin_memory=True + ) + valid_loader = DataLoader( + valid_dataset, + batch_size=batch_size, + shuffle=False, + num_workers=4, + pin_memory=True + ) + + dataset_classes = train_dataset.classes + + print(f"Data berhasil dimuat. Ditemukan {len(dataset_classes)} kelas: {dataset_classes}") + + return train_loader, valid_loader, dataset_classes \ No newline at end of file diff --git a/.history/src/dataset_20250926074509.py b/.history/src/dataset_20250926074509.py new file mode 100644 index 0000000000000000000000000000000000000000..48fa8c00bc3ef444490835751e562fbf5c2d85a5 --- /dev/null +++ b/.history/src/dataset_20250926074509.py @@ -0,0 +1,62 @@ +# src/dataset.py + +import torch +from torchvision import datasets, transforms +from torch.utils.data import DataLoader + +def get_transforms(image_size): + """ + Mendefinisikan pipeline augmentasi dan transformasi data. + Versi ini menggunakan augmentasi yang lebih kuat untuk mengurangi overfitting. + """ + # Augmentasi agresif untuk data training + train_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomVerticalFlip(p=0.5), + transforms.RandomRotation(45), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0), ratio=(0.9, 1.1)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + # Untuk data validasi, tidak ada augmentasi + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + return train_transform, valid_transform + +def get_dataloaders(data_dir, batch_size, image_size): + """ + Membuat DataLoaders untuk training dan validasi. + """ + train_transform, valid_transform = get_transforms(image_size) + + train_dataset = datasets.ImageFolder(f"{data_dir}/train", transform=train_transform) + valid_dataset = datasets.ImageFolder(f"{data_dir}/valid", transform=valid_transform) + + # Turunkan num_workers jika RAM Anda terbatas + train_loader = DataLoader( + train_dataset, + batch_size=batch_size, + shuffle=True, + num_workers=6, + pin_memory=True + ) + valid_loader = DataLoader( + valid_dataset, + batch_size=batch_size, + shuffle=False, + num_workers=4, + pin_memory=True + ) + + dataset_classes = train_dataset.classes + + print(f"Data berhasil dimuat. Ditemukan {len(dataset_classes)} kelas: {dataset_classes}") + + return train_loader, valid_loader, dataset_classes \ No newline at end of file diff --git a/.history/src/dataset_20250926074512.py b/.history/src/dataset_20250926074512.py new file mode 100644 index 0000000000000000000000000000000000000000..b4fc3b3c2f4f7167ef04d40890df8046bdcd0c26 --- /dev/null +++ b/.history/src/dataset_20250926074512.py @@ -0,0 +1,62 @@ +# src/dataset.py + +import torch +from torchvision import datasets, transforms +from torch.utils.data import DataLoader + +def get_transforms(image_size): + """ + Mendefinisikan pipeline augmentasi dan transformasi data. + Versi ini menggunakan augmentasi yang lebih kuat untuk mengurangi overfitting. + """ + # Augmentasi agresif untuk data training + train_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomVerticalFlip(p=0.5), + transforms.RandomRotation(45), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0), ratio=(0.9, 1.1)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + # Untuk data validasi, tidak ada augmentasi + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + return train_transform, valid_transform + +def get_dataloaders(data_dir, batch_size, image_size): + """ + Membuat DataLoaders untuk training dan validasi. + """ + train_transform, valid_transform = get_transforms(image_size) + + train_dataset = datasets.ImageFolder(f"{data_dir}/train", transform=train_transform) + valid_dataset = datasets.ImageFolder(f"{data_dir}/valid", transform=valid_transform) + + # Turunkan num_workers jika RAM Anda terbatas + train_loader = DataLoader( + train_dataset, + batch_size=batch_size, + shuffle=True, + num_workers=6, + pin_memory=True + ) + valid_loader = DataLoader( + valid_dataset, + batch_size=batch_size, + shuffle=False, + num_workers=6, + pin_memory=True + ) + + dataset_classes = train_dataset.classes + + print(f"Data berhasil dimuat. Ditemukan {len(dataset_classes)} kelas: {dataset_classes}") + + return train_loader, valid_loader, dataset_classes \ No newline at end of file diff --git a/.history/src/dataset_20250927095501.py b/.history/src/dataset_20250927095501.py new file mode 100644 index 0000000000000000000000000000000000000000..09f31fefdf1eba7e0391fcae4776c99df8420204 --- /dev/null +++ b/.history/src/dataset_20250927095501.py @@ -0,0 +1,62 @@ +# src/dataset.py + +import torch +from torchvision import datasets, transforms +from torch.utils.data import DataLoader + +def get_transforms(image_size): + """ + Mendefinisikan pipeline augmentasi dan transformasi data. + Versi ini menggunakan augmentasi yang lebih kuat untuk mengurangi overfitting. + """ + # Augmentasi agresif untuk data training + train_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomVerticalFlip(p=0.5), + transforms.RandomRotation(45), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0), ratio=(0.9, 1.1)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + # Untuk data validasi, tidak ada augmentasi + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + return train_transform, valid_transform + +def get_dataloaders(data_dir, batch_size, image_size): + """ + Membuat DataLoaders untuk training dan validasi. + """ + train_transform, valid_transform = get_transforms(image_size) + + train_dataset = datasets.ImageFolder(f"{data_dir}/train", transform=train_transform) + valid_dataset = datasets.ImageFolder(f"{data_dir}/valid", transform=valid_transform) + + # Turunkan num_workers jika RAM Anda terbatas + train_loader = DataLoader( + train_dataset, + batch_size=batch_size, + shuffle=True, + num_workers=6, + pin_memory=True + ) + valid_loader = DataLoader( + valid_dataset, + batch_size=batch_size, + shuffle=False, + num_workers=6, + pin_memory=True + ) + + dataset_classes = train_dataset.classes + + print(f"Data berhasil dimuat. Ditemukan {len(dataset_classes)} kelas: {dataset_classes}") + + return train_loader, valid_loader, train_dataset.classes, train_dataset \ No newline at end of file diff --git a/.history/src/dataset_20250927100215.py b/.history/src/dataset_20250927100215.py new file mode 100644 index 0000000000000000000000000000000000000000..e6f8593b153b251591acbe658657309717d94548 --- /dev/null +++ b/.history/src/dataset_20250927100215.py @@ -0,0 +1,63 @@ +# src/dataset.py + +import torch +from torchvision import datasets, transforms +from torch.utils.data import DataLoader +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +def get_transforms(image_size): + """ + Mendefinisikan pipeline augmentasi dan transformasi data. + Versi ini menggunakan augmentasi yang lebih kuat untuk mengurangi overfitting. + """ + # Augmentasi agresif untuk data training + train_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomVerticalFlip(p=0.5), + transforms.RandomRotation(45), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0), ratio=(0.9, 1.1)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + # Untuk data validasi, tidak ada augmentasi + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + return train_transform, valid_transform + +def get_dataloaders(data_dir, batch_size, image_size): + """ + Membuat DataLoaders untuk training dan validasi. + """ + train_transform, valid_transform = get_transforms(image_size) + + train_dataset = datasets.ImageFolder(f"{data_dir}/train", transform=train_transform) + valid_dataset = datasets.ImageFolder(f"{data_dir}/valid", transform=valid_transform) + + # Turunkan num_workers jika RAM Anda terbatas + train_loader = DataLoader( + train_dataset, + batch_size=batch_size, + shuffle=True, + num_workers=6, + pin_memory=True + ) + valid_loader = DataLoader( + valid_dataset, + batch_size=batch_size, + shuffle=False, + num_workers=6, + pin_memory=True + ) + + dataset_classes = train_dataset.classes + + print(f"Data berhasil dimuat. Ditemukan {len(dataset_classes)} kelas: {dataset_classes}") + + return train_loader, valid_loader, train_dataset.classes, train_dataset \ No newline at end of file diff --git a/.history/src/dataset_20250927100222.py b/.history/src/dataset_20250927100222.py new file mode 100644 index 0000000000000000000000000000000000000000..c06dd402dd1e644d1b1ea102fba1c1cf876984dc --- /dev/null +++ b/.history/src/dataset_20250927100222.py @@ -0,0 +1,63 @@ +# src/dataset.py + +import torch +from torchvision import datasets, transforms +from torch.utils.data import DataLoader + + +def get_transforms(image_size): + """ + Mendefinisikan pipeline augmentasi dan transformasi data. + Versi ini menggunakan augmentasi yang lebih kuat untuk mengurangi overfitting. + """ + # Augmentasi agresif untuk data training + train_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomVerticalFlip(p=0.5), + transforms.RandomRotation(45), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0), ratio=(0.9, 1.1)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + # Untuk data validasi, tidak ada augmentasi + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + return train_transform, valid_transform + +def get_dataloaders(data_dir, batch_size, image_size): + """ + Membuat DataLoaders untuk training dan validasi. + """ + train_transform, valid_transform = get_transforms(image_size) + + train_dataset = datasets.ImageFolder(f"{data_dir}/train", transform=train_transform) + valid_dataset = datasets.ImageFolder(f"{data_dir}/valid", transform=valid_transform) + + # Turunkan num_workers jika RAM Anda terbatas + train_loader = DataLoader( + train_dataset, + batch_size=batch_size, + shuffle=True, + num_workers=6, + pin_memory=True + ) + valid_loader = DataLoader( + valid_dataset, + batch_size=batch_size, + shuffle=False, + num_workers=6, + pin_memory=True + ) + + dataset_classes = train_dataset.classes + + print(f"Data berhasil dimuat. Ditemukan {len(dataset_classes)} kelas: {dataset_classes}") + + return train_loader, valid_loader, train_dataset.classes, train_dataset \ No newline at end of file diff --git a/.history/src/dataset_20250927100524.py b/.history/src/dataset_20250927100524.py new file mode 100644 index 0000000000000000000000000000000000000000..2487c0a95b16f53808622ea6d5ffe1aad3e77137 --- /dev/null +++ b/.history/src/dataset_20250927100524.py @@ -0,0 +1,60 @@ +# src/dataset.py + +import torch +from torchvision import datasets, transforms +from torch.utils.data import DataLoader + +def get_transforms(image_size): + """ + Mendefinisikan pipeline augmentasi dan transformasi data. + Versi ini menggunakan augmentasi yang kuat untuk mengurangi overfitting. + """ + # Augmentasi agresif untuk data training + train_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomVerticalFlip(p=0.5), + transforms.RandomRotation(45), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0), ratio=(0.9, 1.1)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + # Untuk data validasi, tidak ada augmentasi + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + return train_transform, valid_transform + +def get_dataloaders(data_dir, batch_size, image_size): + """ + Membuat DataLoaders untuk training dan validasi. + """ + train_transform, valid_transform = get_transforms(image_size) + + train_dataset = datasets.ImageFolder(f"{data_dir}/train", transform=train_transform) + valid_dataset = datasets.ImageFolder(f"{data_dir}/valid", transform=valid_transform) + + # DataLoader untuk validasi tetap standar + valid_loader = DataLoader( + valid_dataset, + batch_size=batch_size, + shuffle=False, + num_workers=2, # Sesuaikan dengan kemampuan CPU + pin_memory=True + ) + + # DataLoader untuk training tidak dibuat di sini lagi, + # karena akan dibuat di train.py menggunakan sampler. + # Kita hanya perlu mengembalikan dataset-nya. + + dataset_classes = train_dataset.classes + + print(f"Data berhasil dimuat. Ditemukan {len(dataset_classes)} kelas: {dataset_classes}") + + # Mengembalikan dataset latih agar bisa dibuat sampler-nya di train.py + return None, valid_loader, dataset_classes, train_dataset \ No newline at end of file diff --git a/.history/src/dataset_20250927101252.py b/.history/src/dataset_20250927101252.py new file mode 100644 index 0000000000000000000000000000000000000000..7e0213a1e733dea588ba7126f737859757e8d09c --- /dev/null +++ b/.history/src/dataset_20250927101252.py @@ -0,0 +1,60 @@ +# src/dataset.py + +import torch +from torchvision import datasets, transforms +from torch.utils.data import DataLoader + +def get_transforms(image_size): + """ + Mendefinisikan pipeline augmentasi dan transformasi data. + Versi ini menggunakan augmentasi yang kuat untuk mengurangi overfitting. + """ + # Augmentasi agresif untuk data training + train_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomVerticalFlip(p=0.5), + transforms.RandomRotation(45), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0), ratio=(0.9, 1.1)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + # Untuk data validasi, tidak ada augmentasi + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + return train_transform, valid_transform + +def get_dataloaders(data_dir, batch_size, image_size): + """ + Membuat DataLoaders untuk training dan validasi. + """ + train_transform, valid_transform = get_transforms(image_size) + + train_dataset = datasets.ImageFolder(f"{data_dir}/train", transform=train_transform) + valid_dataset = datasets.ImageFolder(f"{data_dir}/valid", transform=valid_transform) + + # DataLoader untuk validasi tetap standar + valid_loader = DataLoader( + valid_dataset, + batch_size=batch_size, + shuffle=False, + num_workers=4, # Sesuaikan dengan kemampuan CPU + pin_memory=True + ) + + # DataLoader untuk training tidak dibuat di sini lagi, + # karena akan dibuat di train.py menggunakan sampler. + # Kita hanya perlu mengembalikan dataset-nya. + + dataset_classes = train_dataset.classes + + print(f"Data berhasil dimuat. Ditemukan {len(dataset_classes)} kelas: {dataset_classes}") + + # Mengembalikan dataset latih agar bisa dibuat sampler-nya di train.py + return None, valid_loader, dataset_classes, train_dataset \ No newline at end of file diff --git a/.history/src/dataset_20250927160452.py b/.history/src/dataset_20250927160452.py new file mode 100644 index 0000000000000000000000000000000000000000..96b5547a63d1f323c9a1a734ca3d6f2ab62bfcf7 --- /dev/null +++ b/.history/src/dataset_20250927160452.py @@ -0,0 +1,61 @@ +# src/dataset.py + +import torch +from torchvision import datasets, transforms +from torch.utils.data import DataLoader + +def get_transforms(image_size): + """ + Mendefinisikan pipeline augmentasi dan transformasi data. + Versi ini menggunakan augmentasi yang kuat untuk mengurangi overfitting. + """ + # Augmentasi agresif untuk data training + train_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomVerticalFlip(p=0.5), + transforms.RandomRotation(45), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0), ratio=(0.9, 1.1)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + + ]) + + # Untuk data validasi, tidak ada augmentasi + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + return train_transform, valid_transform + +def get_dataloaders(data_dir, batch_size, image_size): + """ + Membuat DataLoaders untuk training dan validasi. + """ + train_transform, valid_transform = get_transforms(image_size) + + train_dataset = datasets.ImageFolder(f"{data_dir}/train", transform=train_transform) + valid_dataset = datasets.ImageFolder(f"{data_dir}/valid", transform=valid_transform) + + # DataLoader untuk validasi tetap standar + valid_loader = DataLoader( + valid_dataset, + batch_size=batch_size, + shuffle=False, + num_workers=4, # Sesuaikan dengan kemampuan CPU + pin_memory=True + ) + + # DataLoader untuk training tidak dibuat di sini lagi, + # karena akan dibuat di train.py menggunakan sampler. + # Kita hanya perlu mengembalikan dataset-nya. + + dataset_classes = train_dataset.classes + + print(f"Data berhasil dimuat. Ditemukan {len(dataset_classes)} kelas: {dataset_classes}") + + # Mengembalikan dataset latih agar bisa dibuat sampler-nya di train.py + return None, valid_loader, dataset_classes, train_dataset \ No newline at end of file diff --git a/.history/src/dataset_20250927160453.py b/.history/src/dataset_20250927160453.py new file mode 100644 index 0000000000000000000000000000000000000000..83e8b1d903c0c021764b7273ae78e31e9f384e51 --- /dev/null +++ b/.history/src/dataset_20250927160453.py @@ -0,0 +1,63 @@ +# src/dataset.py + +import torch +from torchvision import datasets, transforms +from torch.utils.data import DataLoader + +def get_transforms(image_size): + """ + Mendefinisikan pipeline augmentasi dan transformasi data. + Versi ini menggunakan augmentasi yang kuat untuk mengurangi overfitting. + """ + # Augmentasi agresif untuk data training + train_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomVerticalFlip(p=0.5), + transforms.RandomRotation(45), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0), ratio=(0.9, 1.1)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + transforms.RandomAdjustSharpness(sharpness_factor=2, p=0.3), +transforms.RandomAutocontrast(p=0.3), + + ]) + + # Untuk data validasi, tidak ada augmentasi + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + return train_transform, valid_transform + +def get_dataloaders(data_dir, batch_size, image_size): + """ + Membuat DataLoaders untuk training dan validasi. + """ + train_transform, valid_transform = get_transforms(image_size) + + train_dataset = datasets.ImageFolder(f"{data_dir}/train", transform=train_transform) + valid_dataset = datasets.ImageFolder(f"{data_dir}/valid", transform=valid_transform) + + # DataLoader untuk validasi tetap standar + valid_loader = DataLoader( + valid_dataset, + batch_size=batch_size, + shuffle=False, + num_workers=4, # Sesuaikan dengan kemampuan CPU + pin_memory=True + ) + + # DataLoader untuk training tidak dibuat di sini lagi, + # karena akan dibuat di train.py menggunakan sampler. + # Kita hanya perlu mengembalikan dataset-nya. + + dataset_classes = train_dataset.classes + + print(f"Data berhasil dimuat. Ditemukan {len(dataset_classes)} kelas: {dataset_classes}") + + # Mengembalikan dataset latih agar bisa dibuat sampler-nya di train.py + return None, valid_loader, dataset_classes, train_dataset \ No newline at end of file diff --git a/.history/src/dataset_20250927160456.py b/.history/src/dataset_20250927160456.py new file mode 100644 index 0000000000000000000000000000000000000000..10abfab70719499c665e4c456b5a4660b897e1a3 --- /dev/null +++ b/.history/src/dataset_20250927160456.py @@ -0,0 +1,63 @@ +# src/dataset.py + +import torch +from torchvision import datasets, transforms +from torch.utils.data import DataLoader + +def get_transforms(image_size): + """ + Mendefinisikan pipeline augmentasi dan transformasi data. + Versi ini menggunakan augmentasi yang kuat untuk mengurangi overfitting. + """ + # Augmentasi agresif untuk data training + train_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomVerticalFlip(p=0.5), + transforms.RandomRotation(45), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0), ratio=(0.9, 1.1)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + transforms.RandomAdjustSharpness(sharpness_factor=2, p=0.3), + transforms.RandomAutocontrast(p=0.3), + + ]) + + # Untuk data validasi, tidak ada augmentasi + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + return train_transform, valid_transform + +def get_dataloaders(data_dir, batch_size, image_size): + """ + Membuat DataLoaders untuk training dan validasi. + """ + train_transform, valid_transform = get_transforms(image_size) + + train_dataset = datasets.ImageFolder(f"{data_dir}/train", transform=train_transform) + valid_dataset = datasets.ImageFolder(f"{data_dir}/valid", transform=valid_transform) + + # DataLoader untuk validasi tetap standar + valid_loader = DataLoader( + valid_dataset, + batch_size=batch_size, + shuffle=False, + num_workers=4, # Sesuaikan dengan kemampuan CPU + pin_memory=True + ) + + # DataLoader untuk training tidak dibuat di sini lagi, + # karena akan dibuat di train.py menggunakan sampler. + # Kita hanya perlu mengembalikan dataset-nya. + + dataset_classes = train_dataset.classes + + print(f"Data berhasil dimuat. Ditemukan {len(dataset_classes)} kelas: {dataset_classes}") + + # Mengembalikan dataset latih agar bisa dibuat sampler-nya di train.py + return None, valid_loader, dataset_classes, train_dataset \ No newline at end of file diff --git a/.history/src/dataset_20250927160458.py b/.history/src/dataset_20250927160458.py new file mode 100644 index 0000000000000000000000000000000000000000..1ca0a652a84d2102d83533a4fe6bb8e9661608d1 --- /dev/null +++ b/.history/src/dataset_20250927160458.py @@ -0,0 +1,63 @@ +# src/dataset.py + +import torch +from torchvision import datasets, transforms +from torch.utils.data import DataLoader + +def get_transforms(image_size): + """ + Mendefinisikan pipeline augmentasi dan transformasi data. + Versi ini menggunakan augmentasi yang kuat untuk mengurangi overfitting. + """ + # Augmentasi agresif untuk data training + train_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomVerticalFlip(p=0.5), + transforms.RandomRotation(45), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0), ratio=(0.9, 1.1)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + transforms.RandomAdjustSharpness(sharpness_factor=2, p=0.3), + transforms.RandomAutocontrast(p=0.3), + + ]) + + # Untuk data validasi, tidak ada augmentasi + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + return train_transform, valid_transform + +def get_dataloaders(data_dir, batch_size, image_size): + """ + Membuat DataLoaders untuk training dan validasi. + """ + train_transform, valid_transform = get_transforms(image_size) + + train_dataset = datasets.ImageFolder(f"{data_dir}/train", transform=train_transform) + valid_dataset = datasets.ImageFolder(f"{data_dir}/valid", transform=valid_transform) + + # DataLoader untuk validasi tetap standar + valid_loader = DataLoader( + valid_dataset, + batch_size=batch_size, + shuffle=False, + num_workers=4, # Sesuaikan dengan kemampuan CPU + pin_memory=True + ) + + # DataLoader untuk training tidak dibuat di sini lagi, + # karena akan dibuat di train.py menggunakan sampler. + # Kita hanya perlu mengembalikan dataset-nya. + + dataset_classes = train_dataset.classes + + print(f"Data berhasil dimuat. Ditemukan {len(dataset_classes)} kelas: {dataset_classes}") + + # Mengembalikan dataset latih agar bisa dibuat sampler-nya di train.py + return None, valid_loader, dataset_classes, train_dataset \ No newline at end of file diff --git a/.history/src/dataset_20250927160507.py b/.history/src/dataset_20250927160507.py new file mode 100644 index 0000000000000000000000000000000000000000..63ddd79857cee54520786bd39b90504dd18b7058 --- /dev/null +++ b/.history/src/dataset_20250927160507.py @@ -0,0 +1,63 @@ +# src/dataset.py + +import torch +from torchvision import datasets, transforms +from torch.utils.data import DataLoader + +def get_transforms(image_size): + """ + Mendefinisikan pipeline augmentasi dan transformasi data. + Versi ini menggunakan augmentasi yang kuat untuk mengurangi overfitting. + """ + # Augmentasi agresif untuk data training + train_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomVerticalFlip(p=0.5), + transforms.RandomRotation(45), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0), ratio=(0.9, 1.1)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + transforms.RandomAdjustSharpness(sharpness_factor=2, p=0.3), + transforms.RandomAutocontrast(p=0.3) + + ]) + + # Untuk data validasi, tidak ada augmentasi + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + return train_transform, valid_transform + +def get_dataloaders(data_dir, batch_size, image_size): + """ + Membuat DataLoaders untuk training dan validasi. + """ + train_transform, valid_transform = get_transforms(image_size) + + train_dataset = datasets.ImageFolder(f"{data_dir}/train", transform=train_transform) + valid_dataset = datasets.ImageFolder(f"{data_dir}/valid", transform=valid_transform) + + # DataLoader untuk validasi tetap standar + valid_loader = DataLoader( + valid_dataset, + batch_size=batch_size, + shuffle=False, + num_workers=4, # Sesuaikan dengan kemampuan CPU + pin_memory=True + ) + + # DataLoader untuk training tidak dibuat di sini lagi, + # karena akan dibuat di train.py menggunakan sampler. + # Kita hanya perlu mengembalikan dataset-nya. + + dataset_classes = train_dataset.classes + + print(f"Data berhasil dimuat. Ditemukan {len(dataset_classes)} kelas: {dataset_classes}") + + # Mengembalikan dataset latih agar bisa dibuat sampler-nya di train.py + return None, valid_loader, dataset_classes, train_dataset \ No newline at end of file diff --git a/.history/src/dataset_20250927160511.py b/.history/src/dataset_20250927160511.py new file mode 100644 index 0000000000000000000000000000000000000000..96b5547a63d1f323c9a1a734ca3d6f2ab62bfcf7 --- /dev/null +++ b/.history/src/dataset_20250927160511.py @@ -0,0 +1,61 @@ +# src/dataset.py + +import torch +from torchvision import datasets, transforms +from torch.utils.data import DataLoader + +def get_transforms(image_size): + """ + Mendefinisikan pipeline augmentasi dan transformasi data. + Versi ini menggunakan augmentasi yang kuat untuk mengurangi overfitting. + """ + # Augmentasi agresif untuk data training + train_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomVerticalFlip(p=0.5), + transforms.RandomRotation(45), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0), ratio=(0.9, 1.1)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + + ]) + + # Untuk data validasi, tidak ada augmentasi + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + return train_transform, valid_transform + +def get_dataloaders(data_dir, batch_size, image_size): + """ + Membuat DataLoaders untuk training dan validasi. + """ + train_transform, valid_transform = get_transforms(image_size) + + train_dataset = datasets.ImageFolder(f"{data_dir}/train", transform=train_transform) + valid_dataset = datasets.ImageFolder(f"{data_dir}/valid", transform=valid_transform) + + # DataLoader untuk validasi tetap standar + valid_loader = DataLoader( + valid_dataset, + batch_size=batch_size, + shuffle=False, + num_workers=4, # Sesuaikan dengan kemampuan CPU + pin_memory=True + ) + + # DataLoader untuk training tidak dibuat di sini lagi, + # karena akan dibuat di train.py menggunakan sampler. + # Kita hanya perlu mengembalikan dataset-nya. + + dataset_classes = train_dataset.classes + + print(f"Data berhasil dimuat. Ditemukan {len(dataset_classes)} kelas: {dataset_classes}") + + # Mengembalikan dataset latih agar bisa dibuat sampler-nya di train.py + return None, valid_loader, dataset_classes, train_dataset \ No newline at end of file diff --git a/.history/src/dataset_20250927160603.py b/.history/src/dataset_20250927160603.py new file mode 100644 index 0000000000000000000000000000000000000000..67662aa208648b0abd26c98ad5660a43ede01dce --- /dev/null +++ b/.history/src/dataset_20250927160603.py @@ -0,0 +1,58 @@ +# src/dataset.py + +import torch +from torchvision import datasets, transforms +from torch.utils.data import DataLoader + +def get_transforms(image_size): + """ + Mendefinisikan pipeline augmentasi dan transformasi data. + Versi ini menggunakan augmentasi yang kuat untuk mengurangi overfitting. + """ + # Augmentasi agresif untuk data training + train_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomVerticalFlip(p=0.5), + transforms.RandomRotation(45), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0), ratio=(0.9, 1.1)), + transforms.RandomAdjustSharpness(sharpness_factor=2, p=0.3), # ✅ Tambahan baru + transforms.RandomAutocontrast(p=0.3), # ✅ Tambahan baru + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + # Untuk data validasi, tidak ada augmentasi + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + return train_transform, valid_transform + +def get_dataloaders(data_dir, batch_size, image_size): + """ + Membuat DataLoaders untuk training dan validasi. + """ + train_transform, valid_transform = get_transforms(image_size) + + train_dataset = datasets.ImageFolder(f"{data_dir}/train", transform=train_transform) + valid_dataset = datasets.ImageFolder(f"{data_dir}/valid", transform=valid_transform) + + # DataLoader untuk validasi tetap standar + valid_loader = DataLoader( + valid_dataset, + batch_size=batch_size, + shuffle=False, + num_workers=4, # Sesuaikan dengan kemampuan CPU + pin_memory=True + ) + + dataset_classes = train_dataset.classes + + print(f"Data berhasil dimuat. Ditemukan {len(dataset_classes)} kelas: {dataset_classes}") + + # Training loader dibuat di train.py dengan sampler, jadi hanya return dataset + return None, valid_loader, dataset_classes, train_dataset diff --git a/.history/src/dataset_20250927163816.py b/.history/src/dataset_20250927163816.py new file mode 100644 index 0000000000000000000000000000000000000000..5eca5c72ad17d93824b1fea681f22a60cbd5e1cb --- /dev/null +++ b/.history/src/dataset_20250927163816.py @@ -0,0 +1,58 @@ +# src/dataset.py + +import torch +from torchvision import datasets, transforms +from torch.utils.data import DataLoader + +def get_transforms(image_size): + """ + Mendefinisikan pipeline augmentasi dan transformasi data. + Versi ini menggunakan augmentasi yang kuat untuk mengurangi overfitting. + """ + # Augmentasi agresif untuk data training + train_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomVerticalFlip(p=0.5), + transforms.RandomRotation(45), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0), ratio=(0.9, 1.1)), + transforms.RandomAdjustSharpness(sharpness_factor=2, p=0.3), # ✅ Tambahan baru + transforms.RandomAutocontrast(p=0.3), # ✅ Tambahan baru + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + # Untuk data validasi, tidak ada augmentasi + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + return train_transform, valid_transform + +def get_dataloaders(data_dir, batch_size, image_size): + """ + Membuat DataLoaders untuk training dan validasi. + """ + train_transform, valid_transform = get_transforms(image_size) + + train_dataset = datasets.ImageFolder(f"{data_dir}/train", transform=train_transform) + valid_dataset = datasets.ImageFolder(f"{data_dir}/valid", transform=valid_transform) + + # DataLoader untuk validasi tetap standar + valid_loader = DataLoader( + valid_dataset, + batch_size=batch_size, + shuffle=False, + num_workers=0, # Sesuaikan dengan kemampuan CPU + pin_memory=True + ) + + dataset_classes = train_dataset.classes + + print(f"Data berhasil dimuat. Ditemukan {len(dataset_classes)} kelas: {dataset_classes}") + + # Training loader dibuat di train.py dengan sampler, jadi hanya return dataset + return None, valid_loader, dataset_classes, train_dataset diff --git a/.history/src/dataset_20250927163837.py b/.history/src/dataset_20250927163837.py new file mode 100644 index 0000000000000000000000000000000000000000..724f9a576571b0927ae33ce23ebeb4e302810fe0 --- /dev/null +++ b/.history/src/dataset_20250927163837.py @@ -0,0 +1,64 @@ +# src/dataset.py + +import torch +from torchvision import datasets, transforms +from torch.utils.data import DataLoader + +def get_transforms(image_size): + """ + Mendefinisikan pipeline augmentasi dan transformasi data. + Versi ini menggunakan augmentasi yang kuat untuk mengurangi overfitting. + """ + # Augmentasi agresif untuk data training + train_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomVerticalFlip(p=0.5), + transforms.RandomRotation(45), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0), ratio=(0.9, 1.1)), + transforms.RandomAdjustSharpness(sharpness_factor=2, p=0.3), # ✅ Tambahan baru + transforms.RandomAutocontrast(p=0.3), # ✅ Tambahan baru + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + # Untuk data validasi, tidak ada augmentasi + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + return train_transform, valid_transform + +def get_dataloaders(data_dir, batch_size, image_size): + """ + Membuat DataLoaders untuk training dan validasi. + """ + train_transform, valid_transform = get_transforms(image_size) + + train_dataset = datasets.ImageFolder(f"{data_dir}/train", transform=train_transform) + valid_dataset = datasets.ImageFolder(f"{data_dir}/valid", transform=valid_transform) + train_loader = DataLoader( + train_dataset, + batch_size=batch_size, + sampler=train_sampler, + num_workers=0, + pin_memory=True +) + # DataLoader untuk validasi tetap standar + valid_loader = DataLoader( + valid_dataset, + batch_size=batch_size, + shuffle=False, + num_workers=0, # Sesuaikan dengan kemampuan CPU + pin_memory=True + ) + + dataset_classes = train_dataset.classes + + print(f"Data berhasil dimuat. Ditemukan {len(dataset_classes)} kelas: {dataset_classes}") + + # Training loader dibuat di train.py dengan sampler, jadi hanya return dataset + return None, valid_loader, dataset_classes, train_dataset diff --git a/.history/src/dataset_20250927163841.py b/.history/src/dataset_20250927163841.py new file mode 100644 index 0000000000000000000000000000000000000000..5eca5c72ad17d93824b1fea681f22a60cbd5e1cb --- /dev/null +++ b/.history/src/dataset_20250927163841.py @@ -0,0 +1,58 @@ +# src/dataset.py + +import torch +from torchvision import datasets, transforms +from torch.utils.data import DataLoader + +def get_transforms(image_size): + """ + Mendefinisikan pipeline augmentasi dan transformasi data. + Versi ini menggunakan augmentasi yang kuat untuk mengurangi overfitting. + """ + # Augmentasi agresif untuk data training + train_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomVerticalFlip(p=0.5), + transforms.RandomRotation(45), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0), ratio=(0.9, 1.1)), + transforms.RandomAdjustSharpness(sharpness_factor=2, p=0.3), # ✅ Tambahan baru + transforms.RandomAutocontrast(p=0.3), # ✅ Tambahan baru + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + # Untuk data validasi, tidak ada augmentasi + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + return train_transform, valid_transform + +def get_dataloaders(data_dir, batch_size, image_size): + """ + Membuat DataLoaders untuk training dan validasi. + """ + train_transform, valid_transform = get_transforms(image_size) + + train_dataset = datasets.ImageFolder(f"{data_dir}/train", transform=train_transform) + valid_dataset = datasets.ImageFolder(f"{data_dir}/valid", transform=valid_transform) + + # DataLoader untuk validasi tetap standar + valid_loader = DataLoader( + valid_dataset, + batch_size=batch_size, + shuffle=False, + num_workers=0, # Sesuaikan dengan kemampuan CPU + pin_memory=True + ) + + dataset_classes = train_dataset.classes + + print(f"Data berhasil dimuat. Ditemukan {len(dataset_classes)} kelas: {dataset_classes}") + + # Training loader dibuat di train.py dengan sampler, jadi hanya return dataset + return None, valid_loader, dataset_classes, train_dataset diff --git a/.history/src/dataset_20250927164606.py b/.history/src/dataset_20250927164606.py new file mode 100644 index 0000000000000000000000000000000000000000..e64228a37d84ce65a8289a75ba0a96da41a9b1f5 --- /dev/null +++ b/.history/src/dataset_20250927164606.py @@ -0,0 +1,54 @@ +# src/dataset.py + +import torch +from torchvision import datasets, transforms +from torch.utils.data import DataLoader + +def get_transforms(image_size): + """ + Mendefinisikan pipeline augmentasi dan transformasi data. + """ + # Augmentasi agresif untuk data training + train_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomVerticalFlip(p=0.5), + transforms.RandomRotation(45), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0), ratio=(0.9, 1.1)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + # Untuk data validasi, tidak ada augmentasi + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + return train_transform, valid_transform + +def get_dataloaders(data_dir, batch_size, image_size, num_workers): + """ + Membuat DataLoaders untuk training dan validasi. + """ + train_transform, valid_transform = get_transforms(image_size) + + train_dataset = datasets.ImageFolder(f"{data_dir}/train", transform=train_transform) + valid_dataset = datasets.ImageFolder(f"{data_dir}/valid", transform=valid_transform) + + valid_loader = DataLoader( + valid_dataset, + batch_size=batch_size, + shuffle=False, + num_workers=num_workers, + pin_memory=True + ) + + dataset_classes = train_dataset.classes + + print(f"Data berhasil dimuat. Ditemukan {len(dataset_classes)} kelas: {dataset_classes}") + + # Mengembalikan dataset latih agar bisa dibuat sampler-nya di train.py + return None, valid_loader, dataset_classes, train_dataset \ No newline at end of file diff --git a/.history/src/dataset_20250929121236.py b/.history/src/dataset_20250929121236.py new file mode 100644 index 0000000000000000000000000000000000000000..9dcaa97e768c9ba429a8341a9f5c8d5de128acfc --- /dev/null +++ b/.history/src/dataset_20250929121236.py @@ -0,0 +1,54 @@ +# src/dataset.py + +import torch +from torchvision import datasets, transforms +from torch.utils.data import DataLoader + +def get_transforms(image_size): + """ + Mendefinisikan pipeline augmentasi dan transformasi data. + """ + # Augmentasi agresif untuk data training + train_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomVerticalFlip(p=0.5), + transforms.RandomRotation(45), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0), ratio=(0.9, 1.1)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + # data validasi, tidak ada augmentasi + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + return train_transform, valid_transform + +def get_dataloaders(data_dir, batch_size, image_size, num_workers): + """ + Membuat DataLoaders untuk training dan validasi. + """ + train_transform, valid_transform = get_transforms(image_size) + + train_dataset = datasets.ImageFolder(f"{data_dir}/train", transform=train_transform) + valid_dataset = datasets.ImageFolder(f"{data_dir}/valid", transform=valid_transform) + + valid_loader = DataLoader( + valid_dataset, + batch_size=batch_size, + shuffle=False, + num_workers=num_workers, + pin_memory=True + ) + + dataset_classes = train_dataset.classes + + print(f"Data berhasil dimuat. Ditemukan {len(dataset_classes)} kelas: {dataset_classes}") + + # Mengembalikan dataset latih agar bisa dibuat sampler-nya di train.py + return None, valid_loader, dataset_classes, train_dataset \ No newline at end of file diff --git a/.history/src/dataset_20250929121241.py b/.history/src/dataset_20250929121241.py new file mode 100644 index 0000000000000000000000000000000000000000..da9cd73d8ea9aa5e5a34caa0b05f3d67e0cf5723 --- /dev/null +++ b/.history/src/dataset_20250929121241.py @@ -0,0 +1,54 @@ +# src/dataset.py + +import torch +from torchvision import datasets, transforms +from torch.utils.data import DataLoader + +def get_transforms(image_size): + """ + Mendefinisikan pipeline augmentasi dan transformasi data. + """ + # Augmentasi agresif untuk data training + train_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomVerticalFlip(p=0.5), + transforms.RandomRotation(45), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0), ratio=(0.9, 1.1)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + # data validasi + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + return train_transform, valid_transform + +def get_dataloaders(data_dir, batch_size, image_size, num_workers): + """ + Membuat DataLoaders untuk training dan validasi. + """ + train_transform, valid_transform = get_transforms(image_size) + + train_dataset = datasets.ImageFolder(f"{data_dir}/train", transform=train_transform) + valid_dataset = datasets.ImageFolder(f"{data_dir}/valid", transform=valid_transform) + + valid_loader = DataLoader( + valid_dataset, + batch_size=batch_size, + shuffle=False, + num_workers=num_workers, + pin_memory=True + ) + + dataset_classes = train_dataset.classes + + print(f"Data berhasil dimuat. Ditemukan {len(dataset_classes)} kelas: {dataset_classes}") + + # Mengembalikan dataset latih agar bisa dibuat sampler-nya di train.py + return None, valid_loader, dataset_classes, train_dataset \ No newline at end of file diff --git a/.history/src/dataset_20250929121302.py b/.history/src/dataset_20250929121302.py new file mode 100644 index 0000000000000000000000000000000000000000..f8e66c9cadd00be05ecd70d7c3c7941aef3f28c3 --- /dev/null +++ b/.history/src/dataset_20250929121302.py @@ -0,0 +1,54 @@ +# src/dataset.py + +import torch +from torchvision import datasets, transforms +from torch.utils.data import DataLoader + +def get_transforms(image_size): + """ + Mendefinisikan pipeline augmentasi dan transformasi data. + """ + # Augmentasi agresif untuk data training + train_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomVerticalFlip(p=0.5), + transforms.RandomRotation(,3945), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0), ratio=(0.9, 1.1)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + # data validasi + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + return train_transform, valid_transform + +def get_dataloaders(data_dir, batch_size, image_size, num_workers): + """ + Membuat DataLoaders untuk training dan validasi. + """ + train_transform, valid_transform = get_transforms(image_size) + + train_dataset = datasets.ImageFolder(f"{data_dir}/train", transform=train_transform) + valid_dataset = datasets.ImageFolder(f"{data_dir}/valid", transform=valid_transform) + + valid_loader = DataLoader( + valid_dataset, + batch_size=batch_size, + shuffle=False, + num_workers=num_workers, + pin_memory=True + ) + + dataset_classes = train_dataset.classes + + print(f"Data berhasil dimuat. Ditemukan {len(dataset_classes)} kelas: {dataset_classes}") + + # Mengembalikan dataset latih agar bisa dibuat sampler-nya di train.py + return None, valid_loader, dataset_classes, train_dataset \ No newline at end of file diff --git a/.history/src/dataset_20250929121304.py b/.history/src/dataset_20250929121304.py new file mode 100644 index 0000000000000000000000000000000000000000..da9cd73d8ea9aa5e5a34caa0b05f3d67e0cf5723 --- /dev/null +++ b/.history/src/dataset_20250929121304.py @@ -0,0 +1,54 @@ +# src/dataset.py + +import torch +from torchvision import datasets, transforms +from torch.utils.data import DataLoader + +def get_transforms(image_size): + """ + Mendefinisikan pipeline augmentasi dan transformasi data. + """ + # Augmentasi agresif untuk data training + train_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomVerticalFlip(p=0.5), + transforms.RandomRotation(45), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0), ratio=(0.9, 1.1)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + # data validasi + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + return train_transform, valid_transform + +def get_dataloaders(data_dir, batch_size, image_size, num_workers): + """ + Membuat DataLoaders untuk training dan validasi. + """ + train_transform, valid_transform = get_transforms(image_size) + + train_dataset = datasets.ImageFolder(f"{data_dir}/train", transform=train_transform) + valid_dataset = datasets.ImageFolder(f"{data_dir}/valid", transform=valid_transform) + + valid_loader = DataLoader( + valid_dataset, + batch_size=batch_size, + shuffle=False, + num_workers=num_workers, + pin_memory=True + ) + + dataset_classes = train_dataset.classes + + print(f"Data berhasil dimuat. Ditemukan {len(dataset_classes)} kelas: {dataset_classes}") + + # Mengembalikan dataset latih agar bisa dibuat sampler-nya di train.py + return None, valid_loader, dataset_classes, train_dataset \ No newline at end of file diff --git a/.history/src/dataset_20250929121306.py b/.history/src/dataset_20250929121306.py new file mode 100644 index 0000000000000000000000000000000000000000..e0e5265ce24d79abfa3d5cf36adeb1a5d6f01d2c --- /dev/null +++ b/.history/src/dataset_20250929121306.py @@ -0,0 +1,54 @@ +# src/dataset.py + +import torch +from torchvision import datasets, transforms +from torch.utils.data import DataLoader + +def get_transforms(image_size): + """ + Mendefinisikan pipeline augmentasi dan transformasi data. + """ + # Augmentasi agresif untuk data training + train_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomVerticalFlip(p=0.5), + transforms.RandomRotation(30,45), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0), ratio=(0.9, 1.1)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + # data validasi + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + return train_transform, valid_transform + +def get_dataloaders(data_dir, batch_size, image_size, num_workers): + """ + Membuat DataLoaders untuk training dan validasi. + """ + train_transform, valid_transform = get_transforms(image_size) + + train_dataset = datasets.ImageFolder(f"{data_dir}/train", transform=train_transform) + valid_dataset = datasets.ImageFolder(f"{data_dir}/valid", transform=valid_transform) + + valid_loader = DataLoader( + valid_dataset, + batch_size=batch_size, + shuffle=False, + num_workers=num_workers, + pin_memory=True + ) + + dataset_classes = train_dataset.classes + + print(f"Data berhasil dimuat. Ditemukan {len(dataset_classes)} kelas: {dataset_classes}") + + # Mengembalikan dataset latih agar bisa dibuat sampler-nya di train.py + return None, valid_loader, dataset_classes, train_dataset \ No newline at end of file diff --git a/.history/src/dataset_20250930185822.py b/.history/src/dataset_20250930185822.py new file mode 100644 index 0000000000000000000000000000000000000000..da9cd73d8ea9aa5e5a34caa0b05f3d67e0cf5723 --- /dev/null +++ b/.history/src/dataset_20250930185822.py @@ -0,0 +1,54 @@ +# src/dataset.py + +import torch +from torchvision import datasets, transforms +from torch.utils.data import DataLoader + +def get_transforms(image_size): + """ + Mendefinisikan pipeline augmentasi dan transformasi data. + """ + # Augmentasi agresif untuk data training + train_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomVerticalFlip(p=0.5), + transforms.RandomRotation(45), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0), ratio=(0.9, 1.1)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + # data validasi + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + return train_transform, valid_transform + +def get_dataloaders(data_dir, batch_size, image_size, num_workers): + """ + Membuat DataLoaders untuk training dan validasi. + """ + train_transform, valid_transform = get_transforms(image_size) + + train_dataset = datasets.ImageFolder(f"{data_dir}/train", transform=train_transform) + valid_dataset = datasets.ImageFolder(f"{data_dir}/valid", transform=valid_transform) + + valid_loader = DataLoader( + valid_dataset, + batch_size=batch_size, + shuffle=False, + num_workers=num_workers, + pin_memory=True + ) + + dataset_classes = train_dataset.classes + + print(f"Data berhasil dimuat. Ditemukan {len(dataset_classes)} kelas: {dataset_classes}") + + # Mengembalikan dataset latih agar bisa dibuat sampler-nya di train.py + return None, valid_loader, dataset_classes, train_dataset \ No newline at end of file diff --git a/.history/src/dataset_20250930190151.py b/.history/src/dataset_20250930190151.py new file mode 100644 index 0000000000000000000000000000000000000000..3323db914a49ddf852214dec523ea82d6fb5bfc6 --- /dev/null +++ b/.history/src/dataset_20250930190151.py @@ -0,0 +1,29 @@ +# src/dataset.py + +import torch +from torchvision import datasets, transforms +from torch.utils.data import DataLoader + +def get_transforms(image_size): + """ + Pipeline augmentasi & transformasi data. + """ + train_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomVerticalFlip(p=0.5), + transforms.RandomRotation(degrees=(-45, 45)), # perbaikan + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0), ratio=(0.9, 1.1)), + transforms.GaussianBlur(kernel_size=3, sigma=(0.1, 2.0)), # tambahan + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + return train_transform, valid_transform diff --git a/.history/src/dataset_20250930190404.py b/.history/src/dataset_20250930190404.py new file mode 100644 index 0000000000000000000000000000000000000000..8060ddc955bc30ef707491121e7a3d370998b730 --- /dev/null +++ b/.history/src/dataset_20250930190404.py @@ -0,0 +1,45 @@ +import os +from torch.utils.data import DataLoader, WeightedRandomSampler +from torchvision import datasets, transforms +import numpy as np +from collections import Counter + +def get_dataloaders(data_dir, batch_size=32, image_size=224, num_workers=2): + # Augmentasi training + train_transform = transforms.Compose([ + transforms.RandomHorizontalFlip(), + transforms.RandomVerticalFlip(), + transforms.RandomRotation(degrees=(-45, 45)), + transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Augmentasi validasi lebih ringan + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Load dataset + train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform) + valid_dataset = datasets.ImageFolder(os.path.join(data_dir, "valid"), transform=valid_transform) + + # Hitung distribusi class untuk WeightedRandomSampler + class_counts = Counter([label for _, label in train_dataset.samples]) + class_weights = {cls: 1.0 / count for cls, count in class_counts.items()} + sample_weights = [class_weights[label] for _, label in train_dataset.samples] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # DataLoader + train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, + num_workers=num_workers, pin_memory=True) + valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, + num_workers=num_workers, pin_memory=True) + + return train_loader, valid_loader, train_dataset.classes, train_dataset diff --git a/.history/src/dataset_20251001110207.py b/.history/src/dataset_20251001110207.py new file mode 100644 index 0000000000000000000000000000000000000000..a42bd83fd1fcd87d486c232a411dab5a53b58614 --- /dev/null +++ b/.history/src/dataset_20251001110207.py @@ -0,0 +1,45 @@ +import os +from torch.utils.data import DataLoader, WeightedRandomSampler +from torchvision import datasets, transforms +import numpy as np +from collections import Counter + +def get_dataloaders(data_dir, batch_size=32, image_size=224, num_workers=2): + # Augmentasi training + train_transform = transforms.Compose([ + transforms.RandomHorizontalFlip(), + transforms.RandomVerticalFlip(), + transforms.RandomRotation(degrees=(20)), + transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Augmentasi validasi lebih ringan + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Load dataset + train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform) + valid_dataset = datasets.ImageFolder(os.path.join(data_dir, "valid"), transform=valid_transform) + + # Hitung distribusi class untuk WeightedRandomSampler + class_counts = Counter([label for _, label in train_dataset.samples]) + class_weights = {cls: 1.0 / count for cls, count in class_counts.items()} + sample_weights = [class_weights[label] for _, label in train_dataset.samples] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # DataLoader + train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, + num_workers=num_workers, pin_memory=True) + valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, + num_workers=num_workers, pin_memory=True) + + return train_loader, valid_loader, train_dataset.classes, train_dataset diff --git a/.history/src/dataset_20251001110212.py b/.history/src/dataset_20251001110212.py new file mode 100644 index 0000000000000000000000000000000000000000..eb155f472dba8ee1c8063296233a3295b888f28d --- /dev/null +++ b/.history/src/dataset_20251001110212.py @@ -0,0 +1,45 @@ +import os +from torch.utils.data import DataLoader, WeightedRandomSampler +from torchvision import datasets, transforms +import numpy as np +from collections import Counter + +def get_dataloaders(data_dir, batch_size=32, image_size=224, num_workers=2): + # Augmentasi training + train_transform = transforms.Compose([ + transforms.RandomHorizontalFlip(), + transforms.RandomVerticalFlip(), + transforms.RandomRotation(degrees=(20)), + transforms.ColorJitter(brightness=0.3, contrast=0.2, saturation=0.2, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Augmentasi validasi lebih ringan + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Load dataset + train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform) + valid_dataset = datasets.ImageFolder(os.path.join(data_dir, "valid"), transform=valid_transform) + + # Hitung distribusi class untuk WeightedRandomSampler + class_counts = Counter([label for _, label in train_dataset.samples]) + class_weights = {cls: 1.0 / count for cls, count in class_counts.items()} + sample_weights = [class_weights[label] for _, label in train_dataset.samples] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # DataLoader + train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, + num_workers=num_workers, pin_memory=True) + valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, + num_workers=num_workers, pin_memory=True) + + return train_loader, valid_loader, train_dataset.classes, train_dataset diff --git a/.history/src/dataset_20251001110217.py b/.history/src/dataset_20251001110217.py new file mode 100644 index 0000000000000000000000000000000000000000..aee9e41ba9e337f61c9a3d7429c5e979ff51cc03 --- /dev/null +++ b/.history/src/dataset_20251001110217.py @@ -0,0 +1,45 @@ +import os +from torch.utils.data import DataLoader, WeightedRandomSampler +from torchvision import datasets, transforms +import numpy as np +from collections import Counter + +def get_dataloaders(data_dir, batch_size=32, image_size=224, num_workers=2): + # Augmentasi training + train_transform = transforms.Compose([ + transforms.RandomHorizontalFlip(), + transforms.RandomVerticalFlip(), + transforms.RandomRotation(degrees=(20)), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.2, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Augmentasi validasi lebih ringan + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Load dataset + train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform) + valid_dataset = datasets.ImageFolder(os.path.join(data_dir, "valid"), transform=valid_transform) + + # Hitung distribusi class untuk WeightedRandomSampler + class_counts = Counter([label for _, label in train_dataset.samples]) + class_weights = {cls: 1.0 / count for cls, count in class_counts.items()} + sample_weights = [class_weights[label] for _, label in train_dataset.samples] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # DataLoader + train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, + num_workers=num_workers, pin_memory=True) + valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, + num_workers=num_workers, pin_memory=True) + + return train_loader, valid_loader, train_dataset.classes, train_dataset diff --git a/.history/src/dataset_20251001110221.py b/.history/src/dataset_20251001110221.py new file mode 100644 index 0000000000000000000000000000000000000000..d5397c7f53ec13dc3eefa2b4fe93b963bfe19787 --- /dev/null +++ b/.history/src/dataset_20251001110221.py @@ -0,0 +1,45 @@ +import os +from torch.utils.data import DataLoader, WeightedRandomSampler +from torchvision import datasets, transforms +import numpy as np +from collections import Counter + +def get_dataloaders(data_dir, batch_size=32, image_size=224, num_workers=2): + # Augmentasi training + train_transform = transforms.Compose([ + transforms.RandomHorizontalFlip(), + transforms.RandomVerticalFlip(), + transforms.RandomRotation(degrees=(20)), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Augmentasi validasi lebih ringan + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Load dataset + train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform) + valid_dataset = datasets.ImageFolder(os.path.join(data_dir, "valid"), transform=valid_transform) + + # Hitung distribusi class untuk WeightedRandomSampler + class_counts = Counter([label for _, label in train_dataset.samples]) + class_weights = {cls: 1.0 / count for cls, count in class_counts.items()} + sample_weights = [class_weights[label] for _, label in train_dataset.samples] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # DataLoader + train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, + num_workers=num_workers, pin_memory=True) + valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, + num_workers=num_workers, pin_memory=True) + + return train_loader, valid_loader, train_dataset.classes, train_dataset diff --git a/.history/src/dataset_20251001110243.py b/.history/src/dataset_20251001110243.py new file mode 100644 index 0000000000000000000000000000000000000000..f546957b7ec3f7b2853a9efa4c734d6669409e8e --- /dev/null +++ b/.history/src/dataset_20251001110243.py @@ -0,0 +1,46 @@ +import os +from torch.utils.data import DataLoader, WeightedRandomSampler +from torchvision import datasets, transforms +import numpy as np +from collections import Counter + +def get_dataloaders(data_dir, batch_size=32, image_size=224, num_workers=2): + # Augmentasi training + train_transform = transforms.Compose([ + transforms.RandomHorizontalFlip(), + transforms.RandomVerticalFlip(), + transforms.RandomRotation(degrees=(20)), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), shear=10), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Augmentasi validasi lebih ringan + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Load dataset + train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform) + valid_dataset = datasets.ImageFolder(os.path.join(data_dir, "valid"), transform=valid_transform) + + # Hitung distribusi class untuk WeightedRandomSampler + class_counts = Counter([label for _, label in train_dataset.samples]) + class_weights = {cls: 1.0 / count for cls, count in class_counts.items()} + sample_weights = [class_weights[label] for _, label in train_dataset.samples] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # DataLoader + train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, + num_workers=num_workers, pin_memory=True) + valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, + num_workers=num_workers, pin_memory=True) + + return train_loader, valid_loader, train_dataset.classes, train_dataset diff --git a/.history/src/dataset_20251001110244.py b/.history/src/dataset_20251001110244.py new file mode 100644 index 0000000000000000000000000000000000000000..4c1253498f1d402156e4fc8c7e7b2ceda0c8ec3f --- /dev/null +++ b/.history/src/dataset_20251001110244.py @@ -0,0 +1,47 @@ +import os +from torch.utils.data import DataLoader, WeightedRandomSampler +from torchvision import datasets, transforms +import numpy as np +from collections import Counter + +def get_dataloaders(data_dir, batch_size=32, image_size=224, num_workers=2): + # Augmentasi training + train_transform = transforms.Compose([ + transforms.RandomHorizontalFlip(), + transforms.RandomVerticalFlip(), + transforms.RandomRotation(degrees=(20)), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), shear=10), + + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Augmentasi validasi lebih ringan + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Load dataset + train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform) + valid_dataset = datasets.ImageFolder(os.path.join(data_dir, "valid"), transform=valid_transform) + + # Hitung distribusi class untuk WeightedRandomSampler + class_counts = Counter([label for _, label in train_dataset.samples]) + class_weights = {cls: 1.0 / count for cls, count in class_counts.items()} + sample_weights = [class_weights[label] for _, label in train_dataset.samples] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # DataLoader + train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, + num_workers=num_workers, pin_memory=True) + valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, + num_workers=num_workers, pin_memory=True) + + return train_loader, valid_loader, train_dataset.classes, train_dataset diff --git a/.history/src/dataset_20251001110250.py b/.history/src/dataset_20251001110250.py new file mode 100644 index 0000000000000000000000000000000000000000..b9d58979080c85f0e5b472fcf26ad56ef063e77c --- /dev/null +++ b/.history/src/dataset_20251001110250.py @@ -0,0 +1,47 @@ +import os +from torch.utils.data import DataLoader, WeightedRandomSampler +from torchvision import datasets, transforms +import numpy as np +from collections import Counter + +def get_dataloaders(data_dir, batch_size=32, image_size=224, num_workers=2): + # Augmentasi training + train_transform = transforms.Compose([ + transforms.RandomHorizontalFlip(), + transforms.RandomVerticalFlip(), + transforms.RandomRotation(degrees=(20)), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), shear=10), + transforms.RandomErasing(p=0.5, scale=(0.02, 0.15)) + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Augmentasi validasi lebih ringan + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Load dataset + train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform) + valid_dataset = datasets.ImageFolder(os.path.join(data_dir, "valid"), transform=valid_transform) + + # Hitung distribusi class untuk WeightedRandomSampler + class_counts = Counter([label for _, label in train_dataset.samples]) + class_weights = {cls: 1.0 / count for cls, count in class_counts.items()} + sample_weights = [class_weights[label] for _, label in train_dataset.samples] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # DataLoader + train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, + num_workers=num_workers, pin_memory=True) + valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, + num_workers=num_workers, pin_memory=True) + + return train_loader, valid_loader, train_dataset.classes, train_dataset diff --git a/.history/src/dataset_20251001110254.py b/.history/src/dataset_20251001110254.py new file mode 100644 index 0000000000000000000000000000000000000000..10b66d5fa1f4ee1941096a9aaf7fc4dac7050d56 --- /dev/null +++ b/.history/src/dataset_20251001110254.py @@ -0,0 +1,47 @@ +import os +from torch.utils.data import DataLoader, WeightedRandomSampler +from torchvision import datasets, transforms +import numpy as np +from collections import Counter + +def get_dataloaders(data_dir, batch_size=32, image_size=224, num_workers=2): + # Augmentasi training + train_transform = transforms.Compose([ + transforms.RandomHorizontalFlip(), + transforms.RandomVerticalFlip(), + transforms.RandomRotation(degrees=(20)), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), shear=10), + transforms.RandomErasing(p=0.5, scale=(0.02, 0.15)), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Augmentasi validasi lebih ringan + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Load dataset + train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform) + valid_dataset = datasets.ImageFolder(os.path.join(data_dir, "valid"), transform=valid_transform) + + # Hitung distribusi class untuk WeightedRandomSampler + class_counts = Counter([label for _, label in train_dataset.samples]) + class_weights = {cls: 1.0 / count for cls, count in class_counts.items()} + sample_weights = [class_weights[label] for _, label in train_dataset.samples] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # DataLoader + train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, + num_workers=num_workers, pin_memory=True) + valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, + num_workers=num_workers, pin_memory=True) + + return train_loader, valid_loader, train_dataset.classes, train_dataset diff --git a/.history/src/dataset_20251001110412.py b/.history/src/dataset_20251001110412.py new file mode 100644 index 0000000000000000000000000000000000000000..1ffc0964d8df09ebe60ea7fd7e8916cff7fbee4e --- /dev/null +++ b/.history/src/dataset_20251001110412.py @@ -0,0 +1,50 @@ +import os +from torch.utils.data import DataLoader, WeightedRandomSampler +from torchvision import datasets, transforms +import numpy as np +from collections import Counter + +def get_dataloaders(data_dir, batch_size=32, image_size=224, num_workers=2): + # Augmentasi training + train_transform = transforms.Compose([ + transforms.RandomHorizontalFlip(), + transforms.RandomVerticalFlip(), + transforms.RandomRotation(degrees=(20)), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), shear=10), + transforms.RandomErasing(p=0.5, scale=(0.02, 0.15)), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Augmentasi validasi lebih ringan + valid_transform = transforms.Compose([ + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), shear=10), + transforms.RandomErasing(p=0.5, scale=(0.02, 0.15)), + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Load dataset + train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform) + valid_dataset = datasets.ImageFolder(os.path.join(data_dir, "valid"), transform=valid_transform) + + # Hitung distribusi class untuk WeightedRandomSampler + class_counts = Counter([label for _, label in train_dataset.samples]) + class_weights = {cls: 1.0 / count for cls, count in class_counts.items()} + sample_weights = [class_weights[label] for _, label in train_dataset.samples] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # DataLoader + train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, + num_workers=num_workers, pin_memory=True) + valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, + num_workers=num_workers, pin_memory=True) + + return train_loader, valid_loader, train_dataset.classes, train_dataset diff --git a/.history/src/dataset_20251001110415.py b/.history/src/dataset_20251001110415.py new file mode 100644 index 0000000000000000000000000000000000000000..69e5a5a3d4b1f618453901006c648114cd4f3048 --- /dev/null +++ b/.history/src/dataset_20251001110415.py @@ -0,0 +1,50 @@ +import os +from torch.utils.data import DataLoader, WeightedRandomSampler +from torchvision import datasets, transforms +import numpy as np +from collections import Counter + +def get_dataloaders(data_dir, batch_size=32, image_size=224, num_workers=2): + # Augmentasi training + train_transform = transforms.Compose([ + transforms.RandomHorizontalFlip(), + transforms.RandomVerticalFlip(), + transforms.RandomRotation(degrees=(20)), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), shear=10), + transforms.RandomErasing(p=0.5, scale=(0.02, 0.15)), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Augmentasi validasi lebih ringan + valid_transform = transforms.Compose([ + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), shear=10), + transforms.RandomErasing(p=0.5, scale=(0.02, 0.15)), + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Load dataset + train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform) + valid_dataset = datasets.ImageFolder(os.path.join(data_dir, "valid"), transform=valid_transform) + + # Hitung distribusi class untuk WeightedRandomSampler + class_counts = Counter([label for _, label in train_dataset.samples]) + class_weights = {cls: 1.0 / count for cls, count in class_counts.items()} + sample_weights = [class_weights[label] for _, label in train_dataset.samples] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # DataLoader + train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, + num_workers=num_workers, pin_memory=True) + valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, + num_workers=num_workers, pin_memory=True) + + return train_loader, valid_loader, train_dataset.classes, train_dataset diff --git a/.history/src/dataset_20251001111950.py b/.history/src/dataset_20251001111950.py new file mode 100644 index 0000000000000000000000000000000000000000..ca7913d9671149531d4f6a0ffb18c4e78a0afed7 --- /dev/null +++ b/.history/src/dataset_20251001111950.py @@ -0,0 +1,48 @@ +import os +from torch.utils.data import DataLoader, WeightedRandomSampler +from torchvision import datasets, transforms +import numpy as np +from collections import Counter + +def get_dataloaders(data_dir, batch_size=32, image_size=224, num_workers=2): + # Augmentasi training + train_transform = transforms.Compose([ + transforms.RandomHorizontalFlip(), + transforms.RandomVerticalFlip(), + transforms.RandomRotation(degrees=(20)), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), shear=10), + transforms.RandomErasing(p=0.5, scale=(0.02, 0.15)), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Augmentasi validasi lebih ringan + valid_transform = transforms.Compose([ + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Load dataset + train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform) + valid_dataset = datasets.ImageFolder(os.path.join(data_dir, "valid"), transform=valid_transform) + + # Hitung distribusi class untuk WeightedRandomSampler + class_counts = Counter([label for _, label in train_dataset.samples]) + class_weights = {cls: 1.0 / count for cls, count in class_counts.items()} + sample_weights = [class_weights[label] for _, label in train_dataset.samples] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # DataLoader + train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, + num_workers=num_workers, pin_memory=True) + valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, + num_workers=num_workers, pin_memory=True) + + return train_loader, valid_loader, train_dataset.classes, train_dataset diff --git a/.history/src/dataset_20251001111955.py b/.history/src/dataset_20251001111955.py new file mode 100644 index 0000000000000000000000000000000000000000..10b66d5fa1f4ee1941096a9aaf7fc4dac7050d56 --- /dev/null +++ b/.history/src/dataset_20251001111955.py @@ -0,0 +1,47 @@ +import os +from torch.utils.data import DataLoader, WeightedRandomSampler +from torchvision import datasets, transforms +import numpy as np +from collections import Counter + +def get_dataloaders(data_dir, batch_size=32, image_size=224, num_workers=2): + # Augmentasi training + train_transform = transforms.Compose([ + transforms.RandomHorizontalFlip(), + transforms.RandomVerticalFlip(), + transforms.RandomRotation(degrees=(20)), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), shear=10), + transforms.RandomErasing(p=0.5, scale=(0.02, 0.15)), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Augmentasi validasi lebih ringan + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Load dataset + train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform) + valid_dataset = datasets.ImageFolder(os.path.join(data_dir, "valid"), transform=valid_transform) + + # Hitung distribusi class untuk WeightedRandomSampler + class_counts = Counter([label for _, label in train_dataset.samples]) + class_weights = {cls: 1.0 / count for cls, count in class_counts.items()} + sample_weights = [class_weights[label] for _, label in train_dataset.samples] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # DataLoader + train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, + num_workers=num_workers, pin_memory=True) + valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, + num_workers=num_workers, pin_memory=True) + + return train_loader, valid_loader, train_dataset.classes, train_dataset diff --git a/.history/src/dataset_20251001112009.py b/.history/src/dataset_20251001112009.py new file mode 100644 index 0000000000000000000000000000000000000000..65f588e9172ea163b1b01950be811b3cab054b4d --- /dev/null +++ b/.history/src/dataset_20251001112009.py @@ -0,0 +1,45 @@ +import os +from torch.utils.data import DataLoader, WeightedRandomSampler +from torchvision import datasets, transforms +import numpy as np +from collections import Counter + +def get_dataloaders(data_dir, batch_size=32, image_size=224, num_workers=2): + # Augmentasi training + train_transform = transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.RandomRotation(20), + transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), + transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)), + transforms.ToTensor(), # <-- WAJIB sebelum Normalize + transforms.Normalize([0.485, 0.456, 0.406], + [0.229, 0.224, 0.225]) + ]) + + # Augmentasi validasi lebih ringan + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Load dataset + train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform) + valid_dataset = datasets.ImageFolder(os.path.join(data_dir, "valid"), transform=valid_transform) + + # Hitung distribusi class untuk WeightedRandomSampler + class_counts = Counter([label for _, label in train_dataset.samples]) + class_weights = {cls: 1.0 / count for cls, count in class_counts.items()} + sample_weights = [class_weights[label] for _, label in train_dataset.samples] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # DataLoader + train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, + num_workers=num_workers, pin_memory=True) + valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, + num_workers=num_workers, pin_memory=True) + + return train_loader, valid_loader, train_dataset.classes, train_dataset diff --git a/.history/src/dataset_20251001112013.py b/.history/src/dataset_20251001112013.py new file mode 100644 index 0000000000000000000000000000000000000000..c6cb50d5dbd7b69dc931a388930430d79d479c3b --- /dev/null +++ b/.history/src/dataset_20251001112013.py @@ -0,0 +1,45 @@ +import os +from torch.utils.data import DataLoader, WeightedRandomSampler +from torchvision import datasets, transforms +import numpy as np +from collections import Counter + +def get_dataloaders(data_dir, batch_size=32, image_size=224, num_workers=2): + # Augmentasi training + train_transform = transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.RandomRotation(20), + transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), + transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)), + transforms.ToTensor(), # <-- WAJIB sebelum Normalize + transforms.Normalize([0.485, 0.456, 0.406], + [0.229, 0.224, 0.225]) + ]) + + # Augmentasi validasi lebih ringan + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Load dataset + train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform) + valid_dataset = datasets.ImageFolder(os.path.join(data_dir, "valid"), transform=valid_transform) + + # Hitung distribusi class untuk WeightedRandomSampler + class_counts = Counter([label for _, label in train_dataset.samples]) + class_weights = {cls: 1.0 / count for cls, count in class_counts.items()} + sample_weights = [class_weights[label] for _, label in train_dataset.samples] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # DataLoader + train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, + num_workers=num_workers, pin_memory=True) + valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, + num_workers=num_workers, pin_memory=True) + + return train_loader, valid_loader, train_dataset.classes, train_dataset diff --git a/.history/src/dataset_20251001112015.py b/.history/src/dataset_20251001112015.py new file mode 100644 index 0000000000000000000000000000000000000000..31af9e0301955989df9a4a4562ff1b63fd1235b5 --- /dev/null +++ b/.history/src/dataset_20251001112015.py @@ -0,0 +1,45 @@ +import os +from torch.utils.data import DataLoader, WeightedRandomSampler +from torchvision import datasets, transforms +import numpy as np +from collections import Counter + +def get_dataloaders(data_dir, batch_size=32, image_size=224, num_workers=2): + # Augmentasi training + train_transform = transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.RandomRotation(20), + transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), + transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)), + transforms.ToTensor(), # <-- WAJIB sebelum Normalize + transforms.Normalize([0.485, 0.456, 0.406], + [0.229, 0.224, 0.225]) + ]) + + # Augmentasi validasi lebih ringan + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Load dataset + train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform) + valid_dataset = datasets.ImageFolder(os.path.join(data_dir, "valid"), transform=valid_transform) + + # Hitung distribusi class untuk WeightedRandomSampler + class_counts = Counter([label for _, label in train_dataset.samples]) + class_weights = {cls: 1.0 / count for cls, count in class_counts.items()} + sample_weights = [class_weights[label] for _, label in train_dataset.samples] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # DataLoader + train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, + num_workers=num_workers, pin_memory=True) + valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, + num_workers=num_workers, pin_memory=True) + + return train_loader, valid_loader, train_dataset.classes, train_dataset diff --git a/.history/src/dataset_20251001112016.py b/.history/src/dataset_20251001112016.py new file mode 100644 index 0000000000000000000000000000000000000000..14d6e33a94f62986c75e8d98031ade70ca4ec907 --- /dev/null +++ b/.history/src/dataset_20251001112016.py @@ -0,0 +1,45 @@ +import os +from torch.utils.data import DataLoader, WeightedRandomSampler +from torchvision import datasets, transforms +import numpy as np +from collections import Counter + +def get_dataloaders(data_dir, batch_size=32, image_size=224, num_workers=2): + # Augmentasi training + train_transform = transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.RandomRotation(20), + transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), + transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)), + transforms.ToTensor(), # <-- WAJIB sebelum Normalize + transforms.Normalize([0.485, 0.456, 0.406], + [0.229, 0.224, 0.225]) + ]) + + # Augmentasi validasi lebih ringan + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Load dataset + train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform) + valid_dataset = datasets.ImageFolder(os.path.join(data_dir, "valid"), transform=valid_transform) + + # Hitung distribusi class untuk WeightedRandomSampler + class_counts = Counter([label for _, label in train_dataset.samples]) + class_weights = {cls: 1.0 / count for cls, count in class_counts.items()} + sample_weights = [class_weights[label] for _, label in train_dataset.samples] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # DataLoader + train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, + num_workers=num_workers, pin_memory=True) + valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, + num_workers=num_workers, pin_memory=True) + + return train_loader, valid_loader, train_dataset.classes, train_dataset diff --git a/.history/src/dataset_20251001112017.py b/.history/src/dataset_20251001112017.py new file mode 100644 index 0000000000000000000000000000000000000000..39ed589edb553f9c720b715fe4141e2ccc0f8a6f --- /dev/null +++ b/.history/src/dataset_20251001112017.py @@ -0,0 +1,45 @@ +import os +from torch.utils.data import DataLoader, WeightedRandomSampler +from torchvision import datasets, transforms +import numpy as np +from collections import Counter + +def get_dataloaders(data_dir, batch_size=32, image_size=224, num_workers=2): + # Augmentasi training + train_transform = transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.RandomRotation(20), + transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), + transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)), + transforms.ToTensor(), # <-- WAJIB sebelum Normalize + transforms.Normalize([0.485, 0.456, 0.406], + [0.229, 0.224, 0.225]) + ]) + + # Augmentasi validasi lebih ringan + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Load dataset + train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform) + valid_dataset = datasets.ImageFolder(os.path.join(data_dir, "valid"), transform=valid_transform) + + # Hitung distribusi class untuk WeightedRandomSampler + class_counts = Counter([label for _, label in train_dataset.samples]) + class_weights = {cls: 1.0 / count for cls, count in class_counts.items()} + sample_weights = [class_weights[label] for _, label in train_dataset.samples] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # DataLoader + train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, + num_workers=num_workers, pin_memory=True) + valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, + num_workers=num_workers, pin_memory=True) + + return train_loader, valid_loader, train_dataset.classes, train_dataset diff --git a/.history/src/dataset_20251001112019.py b/.history/src/dataset_20251001112019.py new file mode 100644 index 0000000000000000000000000000000000000000..c67172ed978cd730979e440cf29bc387700f8251 --- /dev/null +++ b/.history/src/dataset_20251001112019.py @@ -0,0 +1,45 @@ +import os +from torch.utils.data import DataLoader, WeightedRandomSampler +from torchvision import datasets, transforms +import numpy as np +from collections import Counter + +def get_dataloaders(data_dir, batch_size=32, image_size=224, num_workers=2): + # Augmentasi training + train_transform = transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.RandomRotation(20), + transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), + transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)), + transforms.ToTensor(), # <-- WAJIB sebelum Normalize + transforms.Normalize([0.485, 0.456, 0.406], + [0.229, 0.224, 0.225]) + ]) + + # Augmentasi validasi lebih ringan + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Load dataset + train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform) + valid_dataset = datasets.ImageFolder(os.path.join(data_dir, "valid"), transform=valid_transform) + + # Hitung distribusi class untuk WeightedRandomSampler + class_counts = Counter([label for _, label in train_dataset.samples]) + class_weights = {cls: 1.0 / count for cls, count in class_counts.items()} + sample_weights = [class_weights[label] for _, label in train_dataset.samples] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # DataLoader + train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, + num_workers=num_workers, pin_memory=True) + valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, + num_workers=num_workers, pin_memory=True) + + return train_loader, valid_loader, train_dataset.classes, train_dataset diff --git a/.history/src/dataset_20251001112020.py b/.history/src/dataset_20251001112020.py new file mode 100644 index 0000000000000000000000000000000000000000..8e764e616fd3f1ed1a5e6b46a56b1755c6838ee8 --- /dev/null +++ b/.history/src/dataset_20251001112020.py @@ -0,0 +1,45 @@ +import os +from torch.utils.data import DataLoader, WeightedRandomSampler +from torchvision import datasets, transforms +import numpy as np +from collections import Counter + +def get_dataloaders(data_dir, batch_size=32, image_size=224, num_workers=2): + # Augmentasi training + train_transform = transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.RandomRotation(20), + transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), + transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)), + transforms.ToTensor(), # <-- WAJIB sebelum Normalize + transforms.Normalize([0.485, 0.456, 0.406], + [0.229, 0.224, 0.225]) + ]) + + # Augmentasi validasi lebih ringan + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Load dataset + train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform) + valid_dataset = datasets.ImageFolder(os.path.join(data_dir, "valid"), transform=valid_transform) + + # Hitung distribusi class untuk WeightedRandomSampler + class_counts = Counter([label for _, label in train_dataset.samples]) + class_weights = {cls: 1.0 / count for cls, count in class_counts.items()} + sample_weights = [class_weights[label] for _, label in train_dataset.samples] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # DataLoader + train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, + num_workers=num_workers, pin_memory=True) + valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, + num_workers=num_workers, pin_memory=True) + + return train_loader, valid_loader, train_dataset.classes, train_dataset diff --git a/.history/src/dataset_20251001112021.py b/.history/src/dataset_20251001112021.py new file mode 100644 index 0000000000000000000000000000000000000000..7a5d839c395813b82ea7a5ef69c9d31def4e40cc --- /dev/null +++ b/.history/src/dataset_20251001112021.py @@ -0,0 +1,45 @@ +import os +from torch.utils.data import DataLoader, WeightedRandomSampler +from torchvision import datasets, transforms +import numpy as np +from collections import Counter + +def get_dataloaders(data_dir, batch_size=32, image_size=224, num_workers=2): + # Augmentasi training + train_transform = transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.RandomRotation(20), + transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), + transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)), + transforms.ToTensor(), # <-- WAJIB sebelum Normalize + transforms.Normalize([0.485, 0.456, 0.406], + [0.229, 0.224, 0.225]) + ]) + + # Augmentasi validasi lebih ringan + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Load dataset + train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform) + valid_dataset = datasets.ImageFolder(os.path.join(data_dir, "valid"), transform=valid_transform) + + # Hitung distribusi class untuk WeightedRandomSampler + class_counts = Counter([label for _, label in train_dataset.samples]) + class_weights = {cls: 1.0 / count for cls, count in class_counts.items()} + sample_weights = [class_weights[label] for _, label in train_dataset.samples] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # DataLoader + train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, + num_workers=num_workers, pin_memory=True) + valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, + num_workers=num_workers, pin_memory=True) + + return train_loader, valid_loader, train_dataset.classes, train_dataset diff --git a/.history/src/dataset_20251001112024.py b/.history/src/dataset_20251001112024.py new file mode 100644 index 0000000000000000000000000000000000000000..f0ead36fc8f4af2ccc60b5b8afe18e24241efbd1 --- /dev/null +++ b/.history/src/dataset_20251001112024.py @@ -0,0 +1,45 @@ +import os +from torch.utils.data import DataLoader, WeightedRandomSampler +from torchvision import datasets, transforms +import numpy as np +from collections import Counter + +def get_dataloaders(data_dir, batch_size=32, image_size=224, num_workers=2): + # Augmentasi training + train_transform = transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.RandomRotation(20), + transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), + transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)), + transforms.ToTensor(), # <-- WAJIB sebelum Normalize + transforms.Normalize([0.485, 0.456, 0.406], + [0.229, 0.224, 0.225]) + ]) + + # Augmentasi validasi lebih ringan + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Load dataset + train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform) + valid_dataset = datasets.ImageFolder(os.path.join(data_dir, "valid"), transform=valid_transform) + + # Hitung distribusi class untuk WeightedRandomSampler + class_counts = Counter([label for _, label in train_dataset.samples]) + class_weights = {cls: 1.0 / count for cls, count in class_counts.items()} + sample_weights = [class_weights[label] for _, label in train_dataset.samples] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # DataLoader + train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, + num_workers=num_workers, pin_memory=True) + valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, + num_workers=num_workers, pin_memory=True) + + return train_loader, valid_loader, train_dataset.classes, train_dataset diff --git a/.history/src/dataset_20251001112025.py b/.history/src/dataset_20251001112025.py new file mode 100644 index 0000000000000000000000000000000000000000..f0ead36fc8f4af2ccc60b5b8afe18e24241efbd1 --- /dev/null +++ b/.history/src/dataset_20251001112025.py @@ -0,0 +1,45 @@ +import os +from torch.utils.data import DataLoader, WeightedRandomSampler +from torchvision import datasets, transforms +import numpy as np +from collections import Counter + +def get_dataloaders(data_dir, batch_size=32, image_size=224, num_workers=2): + # Augmentasi training + train_transform = transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.RandomRotation(20), + transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), + transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)), + transforms.ToTensor(), # <-- WAJIB sebelum Normalize + transforms.Normalize([0.485, 0.456, 0.406], + [0.229, 0.224, 0.225]) + ]) + + # Augmentasi validasi lebih ringan + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Load dataset + train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform) + valid_dataset = datasets.ImageFolder(os.path.join(data_dir, "valid"), transform=valid_transform) + + # Hitung distribusi class untuk WeightedRandomSampler + class_counts = Counter([label for _, label in train_dataset.samples]) + class_weights = {cls: 1.0 / count for cls, count in class_counts.items()} + sample_weights = [class_weights[label] for _, label in train_dataset.samples] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # DataLoader + train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, + num_workers=num_workers, pin_memory=True) + valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, + num_workers=num_workers, pin_memory=True) + + return train_loader, valid_loader, train_dataset.classes, train_dataset diff --git a/.history/src/dataset_20251001120419.py b/.history/src/dataset_20251001120419.py new file mode 100644 index 0000000000000000000000000000000000000000..10b66d5fa1f4ee1941096a9aaf7fc4dac7050d56 --- /dev/null +++ b/.history/src/dataset_20251001120419.py @@ -0,0 +1,47 @@ +import os +from torch.utils.data import DataLoader, WeightedRandomSampler +from torchvision import datasets, transforms +import numpy as np +from collections import Counter + +def get_dataloaders(data_dir, batch_size=32, image_size=224, num_workers=2): + # Augmentasi training + train_transform = transforms.Compose([ + transforms.RandomHorizontalFlip(), + transforms.RandomVerticalFlip(), + transforms.RandomRotation(degrees=(20)), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), shear=10), + transforms.RandomErasing(p=0.5, scale=(0.02, 0.15)), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Augmentasi validasi lebih ringan + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Load dataset + train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform) + valid_dataset = datasets.ImageFolder(os.path.join(data_dir, "valid"), transform=valid_transform) + + # Hitung distribusi class untuk WeightedRandomSampler + class_counts = Counter([label for _, label in train_dataset.samples]) + class_weights = {cls: 1.0 / count for cls, count in class_counts.items()} + sample_weights = [class_weights[label] for _, label in train_dataset.samples] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # DataLoader + train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, + num_workers=num_workers, pin_memory=True) + valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, + num_workers=num_workers, pin_memory=True) + + return train_loader, valid_loader, train_dataset.classes, train_dataset diff --git a/.history/src/dataset_20251001120426.py b/.history/src/dataset_20251001120426.py new file mode 100644 index 0000000000000000000000000000000000000000..aee9e41ba9e337f61c9a3d7429c5e979ff51cc03 --- /dev/null +++ b/.history/src/dataset_20251001120426.py @@ -0,0 +1,45 @@ +import os +from torch.utils.data import DataLoader, WeightedRandomSampler +from torchvision import datasets, transforms +import numpy as np +from collections import Counter + +def get_dataloaders(data_dir, batch_size=32, image_size=224, num_workers=2): + # Augmentasi training + train_transform = transforms.Compose([ + transforms.RandomHorizontalFlip(), + transforms.RandomVerticalFlip(), + transforms.RandomRotation(degrees=(20)), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.2, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Augmentasi validasi lebih ringan + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Load dataset + train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform) + valid_dataset = datasets.ImageFolder(os.path.join(data_dir, "valid"), transform=valid_transform) + + # Hitung distribusi class untuk WeightedRandomSampler + class_counts = Counter([label for _, label in train_dataset.samples]) + class_weights = {cls: 1.0 / count for cls, count in class_counts.items()} + sample_weights = [class_weights[label] for _, label in train_dataset.samples] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # DataLoader + train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, + num_workers=num_workers, pin_memory=True) + valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, + num_workers=num_workers, pin_memory=True) + + return train_loader, valid_loader, train_dataset.classes, train_dataset diff --git a/.history/src/dataset_20251001120429.py b/.history/src/dataset_20251001120429.py new file mode 100644 index 0000000000000000000000000000000000000000..a42bd83fd1fcd87d486c232a411dab5a53b58614 --- /dev/null +++ b/.history/src/dataset_20251001120429.py @@ -0,0 +1,45 @@ +import os +from torch.utils.data import DataLoader, WeightedRandomSampler +from torchvision import datasets, transforms +import numpy as np +from collections import Counter + +def get_dataloaders(data_dir, batch_size=32, image_size=224, num_workers=2): + # Augmentasi training + train_transform = transforms.Compose([ + transforms.RandomHorizontalFlip(), + transforms.RandomVerticalFlip(), + transforms.RandomRotation(degrees=(20)), + transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Augmentasi validasi lebih ringan + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Load dataset + train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform) + valid_dataset = datasets.ImageFolder(os.path.join(data_dir, "valid"), transform=valid_transform) + + # Hitung distribusi class untuk WeightedRandomSampler + class_counts = Counter([label for _, label in train_dataset.samples]) + class_weights = {cls: 1.0 / count for cls, count in class_counts.items()} + sample_weights = [class_weights[label] for _, label in train_dataset.samples] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # DataLoader + train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, + num_workers=num_workers, pin_memory=True) + valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, + num_workers=num_workers, pin_memory=True) + + return train_loader, valid_loader, train_dataset.classes, train_dataset diff --git a/.history/src/dataset_20251002093405.py b/.history/src/dataset_20251002093405.py new file mode 100644 index 0000000000000000000000000000000000000000..63b1329c981517decf3aaa7035c8a0419683c348 --- /dev/null +++ b/.history/src/dataset_20251002093405.py @@ -0,0 +1,45 @@ +import os +from torch.utils.data import DataLoader, WeightedRandomSampler +from torchvision import datasets, transforms +import numpy as np +from collections import Counter + +def get_dataloaders(data_dir, batch_size=32, image_size=224, num_workers=2): + # Augmentasi training + train_transform = transforms.Compose([ + transforms.RandomHorizontalFlip(), + transforms.RandomVerticalFlip(), + transforms.RandomRotation(degrees=(45)), + transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Augmentasi validasi lebih ringan + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Load dataset + train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform) + valid_dataset = datasets.ImageFolder(os.path.join(data_dir, "valid"), transform=valid_transform) + + # Hitung distribusi class untuk WeightedRandomSampler + class_counts = Counter([label for _, label in train_dataset.samples]) + class_weights = {cls: 1.0 / count for cls, count in class_counts.items()} + sample_weights = [class_weights[label] for _, label in train_dataset.samples] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # DataLoader + train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, + num_workers=num_workers, pin_memory=True) + valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, + num_workers=num_workers, pin_memory=True) + + return train_loader, valid_loader, train_dataset.classes, train_dataset diff --git a/.history/src/dataset_20251002093410.py b/.history/src/dataset_20251002093410.py new file mode 100644 index 0000000000000000000000000000000000000000..11147765292b5cdae848eeca44fc3ecb810bf3dd --- /dev/null +++ b/.history/src/dataset_20251002093410.py @@ -0,0 +1,45 @@ +import os +from torch.utils.data import DataLoader, WeightedRandomSampler +from torchvision import datasets, transforms +import numpy as np +from collections import Counter + +def get_dataloaders(data_dir, batch_size=32, image_size=224, num_workers=2): + # Augmentasi training + train_transform = transforms.Compose([ + transforms.RandomHorizontalFlip(), + transforms.RandomVerticalFlip(), + transforms.RandomRotation(degrees=(45)), + transforms.ColorJitter(brightness=0.3, contrast=0.2, saturation=0.2, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Augmentasi validasi lebih ringan + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Load dataset + train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform) + valid_dataset = datasets.ImageFolder(os.path.join(data_dir, "valid"), transform=valid_transform) + + # Hitung distribusi class untuk WeightedRandomSampler + class_counts = Counter([label for _, label in train_dataset.samples]) + class_weights = {cls: 1.0 / count for cls, count in class_counts.items()} + sample_weights = [class_weights[label] for _, label in train_dataset.samples] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # DataLoader + train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, + num_workers=num_workers, pin_memory=True) + valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, + num_workers=num_workers, pin_memory=True) + + return train_loader, valid_loader, train_dataset.classes, train_dataset diff --git a/.history/src/dataset_20251002093412.py b/.history/src/dataset_20251002093412.py new file mode 100644 index 0000000000000000000000000000000000000000..790c6567995700111f79c99255ce8b5f1c19579c --- /dev/null +++ b/.history/src/dataset_20251002093412.py @@ -0,0 +1,45 @@ +import os +from torch.utils.data import DataLoader, WeightedRandomSampler +from torchvision import datasets, transforms +import numpy as np +from collections import Counter + +def get_dataloaders(data_dir, batch_size=32, image_size=224, num_workers=2): + # Augmentasi training + train_transform = transforms.Compose([ + transforms.RandomHorizontalFlip(), + transforms.RandomVerticalFlip(), + transforms.RandomRotation(degrees=(45)), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.2, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Augmentasi validasi lebih ringan + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Load dataset + train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform) + valid_dataset = datasets.ImageFolder(os.path.join(data_dir, "valid"), transform=valid_transform) + + # Hitung distribusi class untuk WeightedRandomSampler + class_counts = Counter([label for _, label in train_dataset.samples]) + class_weights = {cls: 1.0 / count for cls, count in class_counts.items()} + sample_weights = [class_weights[label] for _, label in train_dataset.samples] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # DataLoader + train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, + num_workers=num_workers, pin_memory=True) + valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, + num_workers=num_workers, pin_memory=True) + + return train_loader, valid_loader, train_dataset.classes, train_dataset diff --git a/.history/src/dataset_20251002093414.py b/.history/src/dataset_20251002093414.py new file mode 100644 index 0000000000000000000000000000000000000000..84afbf7391faa787c10044066967574c90f829a9 --- /dev/null +++ b/.history/src/dataset_20251002093414.py @@ -0,0 +1,45 @@ +import os +from torch.utils.data import DataLoader, WeightedRandomSampler +from torchvision import datasets, transforms +import numpy as np +from collections import Counter + +def get_dataloaders(data_dir, batch_size=32, image_size=224, num_workers=2): + # Augmentasi training + train_transform = transforms.Compose([ + transforms.RandomHorizontalFlip(), + transforms.RandomVerticalFlip(), + transforms.RandomRotation(degrees=(45)), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Augmentasi validasi lebih ringan + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Load dataset + train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform) + valid_dataset = datasets.ImageFolder(os.path.join(data_dir, "valid"), transform=valid_transform) + + # Hitung distribusi class untuk WeightedRandomSampler + class_counts = Counter([label for _, label in train_dataset.samples]) + class_weights = {cls: 1.0 / count for cls, count in class_counts.items()} + sample_weights = [class_weights[label] for _, label in train_dataset.samples] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # DataLoader + train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, + num_workers=num_workers, pin_memory=True) + valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, + num_workers=num_workers, pin_memory=True) + + return train_loader, valid_loader, train_dataset.classes, train_dataset diff --git a/.history/src/dataset_20251002104911.py b/.history/src/dataset_20251002104911.py new file mode 100644 index 0000000000000000000000000000000000000000..4b72f2d8b970cc9e23721158050f4bffebcc500a --- /dev/null +++ b/.history/src/dataset_20251002104911.py @@ -0,0 +1,45 @@ +import os +from torch.utils.data import DataLoader, WeightedRandomSampler +from torchvision import datasets, transforms +import numpy as np +from collections import Counter + +def get_dataloaders(data_dir, batch_size=32, image_size=224, num_workers=4): + # Augmentasi training + train_transform = transforms.Compose([ + transforms.RandomHorizontalFlip(), + transforms.RandomVerticalFlip(), + transforms.RandomRotation(degrees=(45)), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Augmentasi validasi lebih ringan + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Load dataset + train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform) + valid_dataset = datasets.ImageFolder(os.path.join(data_dir, "valid"), transform=valid_transform) + + # Hitung distribusi class untuk WeightedRandomSampler + class_counts = Counter([label for _, label in train_dataset.samples]) + class_weights = {cls: 1.0 / count for cls, count in class_counts.items()} + sample_weights = [class_weights[label] for _, label in train_dataset.samples] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # DataLoader + train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, + num_workers=num_workers, pin_memory=True) + valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, + num_workers=num_workers, pin_memory=True) + + return train_loader, valid_loader, train_dataset.classes, train_dataset diff --git a/.history/src/generate_csv_20250923125440.py b/.history/src/generate_csv_20250923125440.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.history/src/generate_csv_20250923125447.py b/.history/src/generate_csv_20250923125447.py new file mode 100644 index 0000000000000000000000000000000000000000..f4cd6ed8287b1e552368afa73c79c978279f7c0d --- /dev/null +++ b/.history/src/generate_csv_20250923125447.py @@ -0,0 +1,47 @@ +# generate_csv.py + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Path ke dataset asli Anda yang berisi folder 01, 02, 03, ... +SOURCE_DATA_DIR = 'Nuclear Cataract Dataset' +# Nama file CSV yang akan dibuat +OUTPUT_CSV_PATH = 'labels.csv' + +# --- SCRIPT UTAMA --- +image_paths = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}'...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + # Buat path relatif yang benar, misal: '01/DER/IM000001.png' + relative_path = os.path.join(patient_id, eye_folder, filename) + image_paths.append(relative_path) + +print(f"Total {len(image_paths)} gambar ditemukan.") + +# Buat DataFrame pandas +df = pd.DataFrame({'image': image_paths}) + +# Tambahkan kolom 'grade' dengan nilai placeholder +df['grade'] = '_GANTI_SAYA_' + +# Simpan ke file CSV +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' berhasil dibuat.") +print("Silakan buka file tersebut dan isi kolom 'grade' dengan label yang benar.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923125452.py b/.history/src/generate_csv_20250923125452.py new file mode 100644 index 0000000000000000000000000000000000000000..f4cd6ed8287b1e552368afa73c79c978279f7c0d --- /dev/null +++ b/.history/src/generate_csv_20250923125452.py @@ -0,0 +1,47 @@ +# generate_csv.py + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Path ke dataset asli Anda yang berisi folder 01, 02, 03, ... +SOURCE_DATA_DIR = 'Nuclear Cataract Dataset' +# Nama file CSV yang akan dibuat +OUTPUT_CSV_PATH = 'labels.csv' + +# --- SCRIPT UTAMA --- +image_paths = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}'...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + # Buat path relatif yang benar, misal: '01/DER/IM000001.png' + relative_path = os.path.join(patient_id, eye_folder, filename) + image_paths.append(relative_path) + +print(f"Total {len(image_paths)} gambar ditemukan.") + +# Buat DataFrame pandas +df = pd.DataFrame({'image': image_paths}) + +# Tambahkan kolom 'grade' dengan nilai placeholder +df['grade'] = '_GANTI_SAYA_' + +# Simpan ke file CSV +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' berhasil dibuat.") +print("Silakan buka file tersebut dan isi kolom 'grade' dengan label yang benar.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923125520.py b/.history/src/generate_csv_20250923125520.py new file mode 100644 index 0000000000000000000000000000000000000000..69a463016edb9ac6c590c13a45f5273719debd31 --- /dev/null +++ b/.history/src/generate_csv_20250923125520.py @@ -0,0 +1,47 @@ +# generate_csv.py + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Path ke dataset asli Anda yang berisi folder 01, 02, 03, ... +SOURCE_DATA_DIR = 'Nuclear Cataract Dataset' +# Nama file CSV yang akan dibuat +OUTPUT_CSV_PATH = 'labels.csv' + +# --- SCRIPT UTAMA --- +image_paths = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}'...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + # Buat path relatif yang benar, misal: '01/DER/IM000001.png' + relative_path = os.path.join(patient_id, eye_folder, filename) + image_paths.append(relative_path) + +print(f"Total {len(image_paths)} gambar ditemukan.") + +# Buat DataFrame pandas +df = pd.DataFrame({'image': image_paths}) + +# Tambahkan kolom 'grade' dengan nilai placeholder +df['grade'] = '_GANTI_SAYA_' + +# Simpan ke file CSV +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' berhasil dibuat.") +print("Silakan buka file tersebut dan isi kolom 'grade' dengan label yang benar.")de \ No newline at end of file diff --git a/.history/src/generate_csv_20250923125522.py b/.history/src/generate_csv_20250923125522.py new file mode 100644 index 0000000000000000000000000000000000000000..f4cd6ed8287b1e552368afa73c79c978279f7c0d --- /dev/null +++ b/.history/src/generate_csv_20250923125522.py @@ -0,0 +1,47 @@ +# generate_csv.py + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Path ke dataset asli Anda yang berisi folder 01, 02, 03, ... +SOURCE_DATA_DIR = 'Nuclear Cataract Dataset' +# Nama file CSV yang akan dibuat +OUTPUT_CSV_PATH = 'labels.csv' + +# --- SCRIPT UTAMA --- +image_paths = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}'...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + # Buat path relatif yang benar, misal: '01/DER/IM000001.png' + relative_path = os.path.join(patient_id, eye_folder, filename) + image_paths.append(relative_path) + +print(f"Total {len(image_paths)} gambar ditemukan.") + +# Buat DataFrame pandas +df = pd.DataFrame({'image': image_paths}) + +# Tambahkan kolom 'grade' dengan nilai placeholder +df['grade'] = '_GANTI_SAYA_' + +# Simpan ke file CSV +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' berhasil dibuat.") +print("Silakan buka file tersebut dan isi kolom 'grade' dengan label yang benar.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923131917.py b/.history/src/generate_csv_20250923131917.py new file mode 100644 index 0000000000000000000000000000000000000000..f53711f29c22308b9b88541dc4e26eef7d028468 --- /dev/null +++ b/.history/src/generate_csv_20250923131917.py @@ -0,0 +1,47 @@ +# generate_csv.py + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Path ke dataset asli Anda yang berisi folder 01, 02, 03, ... +SOURCE_DATA_DIR = '..Nuclear Cataract Dataset' +# Nama file CSV yang akan dibuat +OUTPUT_CSV_PATH = 'labels.csv' + +# --- SCRIPT UTAMA --- +image_paths = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}'...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + # Buat path relatif yang benar, misal: '01/DER/IM000001.png' + relative_path = os.path.join(patient_id, eye_folder, filename) + image_paths.append(relative_path) + +print(f"Total {len(image_paths)} gambar ditemukan.") + +# Buat DataFrame pandas +df = pd.DataFrame({'image': image_paths}) + +# Tambahkan kolom 'grade' dengan nilai placeholder +df['grade'] = '_GANTI_SAYA_' + +# Simpan ke file CSV +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' berhasil dibuat.") +print("Silakan buka file tersebut dan isi kolom 'grade' dengan label yang benar.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923131918.py b/.history/src/generate_csv_20250923131918.py new file mode 100644 index 0000000000000000000000000000000000000000..8c50b5a479c63b667ffcc52ac38c9339c58dbc89 --- /dev/null +++ b/.history/src/generate_csv_20250923131918.py @@ -0,0 +1,47 @@ +# generate_csv.py + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Path ke dataset asli Anda yang berisi folder 01, 02, 03, ... +SOURCE_DATA_DIR = '../Nuclear Cataract Dataset' +# Nama file CSV yang akan dibuat +OUTPUT_CSV_PATH = 'labels.csv' + +# --- SCRIPT UTAMA --- +image_paths = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}'...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + # Buat path relatif yang benar, misal: '01/DER/IM000001.png' + relative_path = os.path.join(patient_id, eye_folder, filename) + image_paths.append(relative_path) + +print(f"Total {len(image_paths)} gambar ditemukan.") + +# Buat DataFrame pandas +df = pd.DataFrame({'image': image_paths}) + +# Tambahkan kolom 'grade' dengan nilai placeholder +df['grade'] = '_GANTI_SAYA_' + +# Simpan ke file CSV +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' berhasil dibuat.") +print("Silakan buka file tersebut dan isi kolom 'grade' dengan label yang benar.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923131921.py b/.history/src/generate_csv_20250923131921.py new file mode 100644 index 0000000000000000000000000000000000000000..9207ee84173722a27db70808c335c29fc5180f14 --- /dev/null +++ b/.history/src/generate_csv_20250923131921.py @@ -0,0 +1,47 @@ +# generate_csv.py + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Path ke dataset asli Anda yang berisi folder 01, 02, 03, ... +SOURCE_DATA_DIR = '/Nuclear Cataract Dataset' +# Nama file CSV yang akan dibuat +OUTPUT_CSV_PATH = 'labels.csv' + +# --- SCRIPT UTAMA --- +image_paths = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}'...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + # Buat path relatif yang benar, misal: '01/DER/IM000001.png' + relative_path = os.path.join(patient_id, eye_folder, filename) + image_paths.append(relative_path) + +print(f"Total {len(image_paths)} gambar ditemukan.") + +# Buat DataFrame pandas +df = pd.DataFrame({'image': image_paths}) + +# Tambahkan kolom 'grade' dengan nilai placeholder +df['grade'] = '_GANTI_SAYA_' + +# Simpan ke file CSV +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' berhasil dibuat.") +print("Silakan buka file tersebut dan isi kolom 'grade' dengan label yang benar.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923131932.py b/.history/src/generate_csv_20250923131932.py new file mode 100644 index 0000000000000000000000000000000000000000..93c466923dd11c7c0f2aa2e4c6a0cd0fad96d4b2 --- /dev/null +++ b/.history/src/generate_csv_20250923131932.py @@ -0,0 +1,47 @@ +# generate_csv.py + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Path ke dataset asli Anda yang berisi folder 01, 02, 03, ... +SOURCE_DATA_DIR = 'C:\Users\user\Documents\Project\Cataract-ViT\data\Nuclear Cataract Database for Biomedical and Machine Learning Applications' +# Nama file CSV yang akan dibuat +OUTPUT_CSV_PATH = 'labels.csv' + +# --- SCRIPT UTAMA --- +image_paths = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}'...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + # Buat path relatif yang benar, misal: '01/DER/IM000001.png' + relative_path = os.path.join(patient_id, eye_folder, filename) + image_paths.append(relative_path) + +print(f"Total {len(image_paths)} gambar ditemukan.") + +# Buat DataFrame pandas +df = pd.DataFrame({'image': image_paths}) + +# Tambahkan kolom 'grade' dengan nilai placeholder +df['grade'] = '_GANTI_SAYA_' + +# Simpan ke file CSV +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' berhasil dibuat.") +print("Silakan buka file tersebut dan isi kolom 'grade' dengan label yang benar.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923131938.py b/.history/src/generate_csv_20250923131938.py new file mode 100644 index 0000000000000000000000000000000000000000..b1ae77e54042687eee905c8df2734221f7ebd7b4 --- /dev/null +++ b/.history/src/generate_csv_20250923131938.py @@ -0,0 +1,47 @@ +# generate_csv.py + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Path ke dataset asli Anda yang berisi folder 01, 02, 03, ... +SOURCE_DATA_DIR = 'C:\Users\user\Documents\ProjectCataract-ViT\data\Nuclear Cataract Database for Biomedical and Machine Learning Applications' +# Nama file CSV yang akan dibuat +OUTPUT_CSV_PATH = 'labels.csv' + +# --- SCRIPT UTAMA --- +image_paths = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}'...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + # Buat path relatif yang benar, misal: '01/DER/IM000001.png' + relative_path = os.path.join(patient_id, eye_folder, filename) + image_paths.append(relative_path) + +print(f"Total {len(image_paths)} gambar ditemukan.") + +# Buat DataFrame pandas +df = pd.DataFrame({'image': image_paths}) + +# Tambahkan kolom 'grade' dengan nilai placeholder +df['grade'] = '_GANTI_SAYA_' + +# Simpan ke file CSV +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' berhasil dibuat.") +print("Silakan buka file tersebut dan isi kolom 'grade' dengan label yang benar.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923131944.py b/.history/src/generate_csv_20250923131944.py new file mode 100644 index 0000000000000000000000000000000000000000..22e11443d77747f1ae5ef7605b6aac9614055f57 --- /dev/null +++ b/.history/src/generate_csv_20250923131944.py @@ -0,0 +1,47 @@ +# generate_csv.py + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Path ke dataset asli Anda yang berisi folder 01, 02, 03, ... +SOURCE_DATA_DIR = 'data\Nuclear Cataract Database for Biomedical and Machine Learning Applications' +# Nama file CSV yang akan dibuat +OUTPUT_CSV_PATH = 'labels.csv' + +# --- SCRIPT UTAMA --- +image_paths = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}'...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + # Buat path relatif yang benar, misal: '01/DER/IM000001.png' + relative_path = os.path.join(patient_id, eye_folder, filename) + image_paths.append(relative_path) + +print(f"Total {len(image_paths)} gambar ditemukan.") + +# Buat DataFrame pandas +df = pd.DataFrame({'image': image_paths}) + +# Tambahkan kolom 'grade' dengan nilai placeholder +df['grade'] = '_GANTI_SAYA_' + +# Simpan ke file CSV +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' berhasil dibuat.") +print("Silakan buka file tersebut dan isi kolom 'grade' dengan label yang benar.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923131948.py b/.history/src/generate_csv_20250923131948.py new file mode 100644 index 0000000000000000000000000000000000000000..a125a323951f08a02dfb20b84e8c38f00be212f6 --- /dev/null +++ b/.history/src/generate_csv_20250923131948.py @@ -0,0 +1,47 @@ +# generate_csv.py + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Path ke dataset asli Anda yang berisi folder 01, 02, 03, ... +SOURCE_DATA_DIR = 'data/Nuclear Cataract Database for Biomedical and Machine Learning Applications' +# Nama file CSV yang akan dibuat +OUTPUT_CSV_PATH = 'labels.csv' + +# --- SCRIPT UTAMA --- +image_paths = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}'...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + # Buat path relatif yang benar, misal: '01/DER/IM000001.png' + relative_path = os.path.join(patient_id, eye_folder, filename) + image_paths.append(relative_path) + +print(f"Total {len(image_paths)} gambar ditemukan.") + +# Buat DataFrame pandas +df = pd.DataFrame({'image': image_paths}) + +# Tambahkan kolom 'grade' dengan nilai placeholder +df['grade'] = '_GANTI_SAYA_' + +# Simpan ke file CSV +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' berhasil dibuat.") +print("Silakan buka file tersebut dan isi kolom 'grade' dengan label yang benar.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923132047.py b/.history/src/generate_csv_20250923132047.py new file mode 100644 index 0000000000000000000000000000000000000000..6cd7a8817be757f138392aaa357bfdb95de33be5 --- /dev/null +++ b/.history/src/generate_csv_20250923132047.py @@ -0,0 +1,47 @@ +# generate_csv.py + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Path ke dataset asli Anda yang berisi folder 01, 02, 03, ... +SOURCE_DATA_DIR = 'data/Nuclear Cataract Database for Biomedical and Machine Learning Applications' +# Nama file CSV yang akan dibuat +OUTPUT_CSV_PATH = 'outputs/labels.csv' + +# --- SCRIPT UTAMA --- +image_paths = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}'...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + # Buat path relatif yang benar, misal: '01/DER/IM000001.png' + relative_path = os.path.join(patient_id, eye_folder, filename) + image_paths.append(relative_path) + +print(f"Total {len(image_paths)} gambar ditemukan.") + +# Buat DataFrame pandas +df = pd.DataFrame({'image': image_paths}) + +# Tambahkan kolom 'grade' dengan nilai placeholder +df['grade'] = '_GANTI_SAYA_' + +# Simpan ke file CSV +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' berhasil dibuat.") +print("Silakan buka file tersebut dan isi kolom 'grade' dengan label yang benar.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923132106.py b/.history/src/generate_csv_20250923132106.py new file mode 100644 index 0000000000000000000000000000000000000000..0452072d5fcf62103b9a36d4269637375d8c65bf --- /dev/null +++ b/.history/src/generate_csv_20250923132106.py @@ -0,0 +1,47 @@ +# generate_csv.py + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Path ke dataset asli Anda yang berisi folder 01, 02, 03, ... +SOURCE_DATA_DIR = 'data/Nuclear Cataract Database for Biomedical and Machine Learning Applications' +# Nama file CSV yang akan dibuat +OUTPUT_CSV_PATH = 'outputs/csv/labels.csv' + +# --- SCRIPT UTAMA --- +image_paths = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}'...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + # Buat path relatif yang benar, misal: '01/DER/IM000001.png' + relative_path = os.path.join(patient_id, eye_folder, filename) + image_paths.append(relative_path) + +print(f"Total {len(image_paths)} gambar ditemukan.") + +# Buat DataFrame pandas +df = pd.DataFrame({'image': image_paths}) + +# Tambahkan kolom 'grade' dengan nilai placeholder +df['grade'] = '_GANTI_SAYA_' + +# Simpan ke file CSV +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' berhasil dibuat.") +print("Silakan buka file tersebut dan isi kolom 'grade' dengan label yang benar.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923132255.py b/.history/src/generate_csv_20250923132255.py new file mode 100644 index 0000000000000000000000000000000000000000..24a02fb13a0e6bb10cae50ec42a18f611796d2a9 --- /dev/null +++ b/.history/src/generate_csv_20250923132255.py @@ -0,0 +1,47 @@ +# generate_csv.py + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Path ke dataset asli Anda yang berisi folder 01, 02, 03, ... +SOURCE_DATA_DIR = 'data/Nuclear Cataract Database for Biomedical and Machine Learning Applications' +# Nama file CSV yang akan dibuat +OUTPUT_CSV_PATH = 'outputs/csv/labels.csv' + +# --- SCRIPT UTAMA --- +image_paths = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}'...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + # Buat path relatif yang benar, misal: '01/DER/IM000001.png' + relative_path = os.path.join(patient_id, eye_folder, filename) + image_paths.append(relative_path) + +print(f"Total {len(image_paths)} gambar ditemukan.") + +# Buat DataFrame pandas +df = pd.DataFrame({'image': image_paths}) + +# Tambahkan kolom 'grade' dengan nilai placeholder +df['grade'] = '_LABEL_SAYA_' + +# Simpan ke file CSV +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' berhasil dibuat.") +print("Silakan buka file tersebut dan isi kolom 'grade' dengan label yang benar.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923132259.py b/.history/src/generate_csv_20250923132259.py new file mode 100644 index 0000000000000000000000000000000000000000..f05fe866c5272c7f82f12830af8470ca8c72b5a9 --- /dev/null +++ b/.history/src/generate_csv_20250923132259.py @@ -0,0 +1,47 @@ +# generate_csv.py + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Path ke dataset asli Anda yang berisi folder 01, 02, 03, ... +SOURCE_DATA_DIR = 'data/Nuclear Cataract Database for Biomedical and Machine Learning Applications' +# Nama file CSV yang akan dibuat +OUTPUT_CSV_PATH = 'outputs/csv/labels.csv' + +# --- SCRIPT UTAMA --- +image_paths = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}'...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + # Buat path relatif yang benar, misal: '01/DER/IM000001.png' + relative_path = os.path.join(patient_id, eye_folder, filename) + image_paths.append(relative_path) + +print(f"Total {len(image_paths)} gambar ditemukan.") + +# Buat DataFrame pandas +df = pd.DataFrame({'image': image_paths}) + +# Tambahkan kolom 'grade' dengan nilai placeholder +df['grade'] = '_LABEL_KOSONG_' + +# Simpan ke file CSV +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' berhasil dibuat.") +print("Silakan buka file tersebut dan isi kolom 'grade' dengan label yang benar.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923132410.py b/.history/src/generate_csv_20250923132410.py new file mode 100644 index 0000000000000000000000000000000000000000..0b141ae268325edbd3456a19d7422c57685c25f1 --- /dev/null +++ b/.history/src/generate_csv_20250923132410.py @@ -0,0 +1,47 @@ +# generate_csv.py + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Path ke dataset asli Anda yang berisi folder 01, 02, 03, ... +SOURCE_DATA_DIR = '' +# Nama file CSV yang akan dibuat +OUTPUT_CSV_PATH = 'outputs/csv/labels.csv' + +# --- SCRIPT UTAMA --- +image_paths = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}'...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + # Buat path relatif yang benar, misal: '01/DER/IM000001.png' + relative_path = os.path.join(patient_id, eye_folder, filename) + image_paths.append(relative_path) + +print(f"Total {len(image_paths)} gambar ditemukan.") + +# Buat DataFrame pandas +df = pd.DataFrame({'image': image_paths}) + +# Tambahkan kolom 'grade' dengan nilai placeholder +df['grade'] = '_LABEL_KOSONG_' + +# Simpan ke file CSV +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' berhasil dibuat.") +print("Silakan buka file tersebut dan isi kolom 'grade' dengan label yang benar.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923132411.py b/.history/src/generate_csv_20250923132411.py new file mode 100644 index 0000000000000000000000000000000000000000..199512dc890fdc264d529bbc67690bc486f112e3 --- /dev/null +++ b/.history/src/generate_csv_20250923132411.py @@ -0,0 +1,47 @@ +# generate_csv.py + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Path ke dataset asli Anda yang berisi folder 01, 02, 03, ... +SOURCE_DATA_DIR = 'data\Nuclear Cataract Database for Biomedical and Machine Learning Applications\Nuclear Cataract Dataset' +# Nama file CSV yang akan dibuat +OUTPUT_CSV_PATH = 'outputs/csv/labels.csv' + +# --- SCRIPT UTAMA --- +image_paths = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}'...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + # Buat path relatif yang benar, misal: '01/DER/IM000001.png' + relative_path = os.path.join(patient_id, eye_folder, filename) + image_paths.append(relative_path) + +print(f"Total {len(image_paths)} gambar ditemukan.") + +# Buat DataFrame pandas +df = pd.DataFrame({'image': image_paths}) + +# Tambahkan kolom 'grade' dengan nilai placeholder +df['grade'] = '_LABEL_KOSONG_' + +# Simpan ke file CSV +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' berhasil dibuat.") +print("Silakan buka file tersebut dan isi kolom 'grade' dengan label yang benar.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923132415.py b/.history/src/generate_csv_20250923132415.py new file mode 100644 index 0000000000000000000000000000000000000000..ead56d15e921fcbb54bff49de0cd87cef4054647 --- /dev/null +++ b/.history/src/generate_csv_20250923132415.py @@ -0,0 +1,47 @@ +# generate_csv.py + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Path ke dataset asli Anda yang berisi folder 01, 02, 03, ... +SOURCE_DATA_DIR = 'data/Nuclear Cataract Database for Biomedical and Machine Learning Applications\Nuclear Cataract Dataset' +# Nama file CSV yang akan dibuat +OUTPUT_CSV_PATH = 'outputs/csv/labels.csv' + +# --- SCRIPT UTAMA --- +image_paths = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}'...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + # Buat path relatif yang benar, misal: '01/DER/IM000001.png' + relative_path = os.path.join(patient_id, eye_folder, filename) + image_paths.append(relative_path) + +print(f"Total {len(image_paths)} gambar ditemukan.") + +# Buat DataFrame pandas +df = pd.DataFrame({'image': image_paths}) + +# Tambahkan kolom 'grade' dengan nilai placeholder +df['grade'] = '_LABEL_KOSONG_' + +# Simpan ke file CSV +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' berhasil dibuat.") +print("Silakan buka file tersebut dan isi kolom 'grade' dengan label yang benar.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923132420.py b/.history/src/generate_csv_20250923132420.py new file mode 100644 index 0000000000000000000000000000000000000000..6f8c36af0d623f37086253c31216f5c5fdffd529 --- /dev/null +++ b/.history/src/generate_csv_20250923132420.py @@ -0,0 +1,47 @@ +# generate_csv.py + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Path ke dataset asli Anda yang berisi folder 01, 02, 03, ... +SOURCE_DATA_DIR = 'data/Nuclear Cataract Database for Biomedical and Machine Learning Applications/Nuclear Cataract Dataset' +# Nama file CSV yang akan dibuat +OUTPUT_CSV_PATH = 'outputs/csv/labels.csv' + +# --- SCRIPT UTAMA --- +image_paths = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}'...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + # Buat path relatif yang benar, misal: '01/DER/IM000001.png' + relative_path = os.path.join(patient_id, eye_folder, filename) + image_paths.append(relative_path) + +print(f"Total {len(image_paths)} gambar ditemukan.") + +# Buat DataFrame pandas +df = pd.DataFrame({'image': image_paths}) + +# Tambahkan kolom 'grade' dengan nilai placeholder +df['grade'] = '_LABEL_KOSONG_' + +# Simpan ke file CSV +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' berhasil dibuat.") +print("Silakan buka file tersebut dan isi kolom 'grade' dengan label yang benar.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923133744.py b/.history/src/generate_csv_20250923133744.py new file mode 100644 index 0000000000000000000000000000000000000000..8535b5d7b10799b0bd335c2e1d66e392fed261c4 --- /dev/null +++ b/.history/src/generate_csv_20250923133744.py @@ -0,0 +1,57 @@ +# generate_csv.py (Versi Final Otomatis) + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Path ke dataset asli Anda yang berisi folder 01, 02, ... +# Dijalankan dari folder utama proyek (Cataract-ViT) +SOURCE_DATA_DIR = 'Nuclear Cataract Dataset' +OUTPUT_CSV_PATH = 'labels.csv' + +# --- SCRIPT UTAMA --- +all_image_data = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}' untuk membuat labels.csv secara otomatis...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Path ke file DATAFILE yang berisi grade + datafile_path = os.path.join(eye_path, 'DATAFILE') + + try: + # Baca grade dari DATAFILE + with open(datafile_path, 'r') as f: + grade = f.read().strip() # .strip() untuk menghapus spasi/baris baru + + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + # Buat path relatif, misal: '01/DER/IM000001.png' + relative_path = os.path.join(patient_id, eye_folder, filename) + # Tambahkan data ke daftar + all_image_data.append({'image': relative_path, 'grade': grade}) + + except FileNotFoundError: + print(f"Peringatan: Tidak ada DATAFILE di folder {eye_path}") + except Exception as e: + print(f"Error saat memproses folder {eye_path}: {e}") + +print(f"\nTotal {len(all_image_data)} gambar ditemukan dan diberi label.") + +# Buat DataFrame pandas dari daftar data +df = pd.DataFrame(all_image_data) + +# Simpan ke file CSV +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' yang sudah terisi lengkap berhasil dibuat.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923133802.py b/.history/src/generate_csv_20250923133802.py new file mode 100644 index 0000000000000000000000000000000000000000..f4422386e51fbfa44b0b9aaa4c9a2647005290ca --- /dev/null +++ b/.history/src/generate_csv_20250923133802.py @@ -0,0 +1,113 @@ +# generate_csv.py (Versi Final Otomatis) + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Path ke dataset asli Anda yang berisi folder 01, 02, ... +# Dijalankan dari folder utama proyek (Cataract-ViT) +SOURCE_DATA_DIR = '# generate_csv.py (Versi Final Otomatis) + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Path ke dataset asli Anda yang berisi folder 01, 02, ... +# Dijalankan dari folder utama proyek (Cataract-ViT) +SOURCE_DATA_DIR = 'Nuclear Cataract Dataset' +OUTPUT_CSV_PATH = 'labels.csv' + +# --- SCRIPT UTAMA --- +all_image_data = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}' untuk membuat labels.csv secara otomatis...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Path ke file DATAFILE yang berisi grade + datafile_path = os.path.join(eye_path, 'DATAFILE') + + try: + # Baca grade dari DATAFILE + with open(datafile_path, 'r') as f: + grade = f.read().strip() # .strip() untuk menghapus spasi/baris baru + + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + # Buat path relatif, misal: '01/DER/IM000001.png' + relative_path = os.path.join(patient_id, eye_folder, filename) + # Tambahkan data ke daftar + all_image_data.append({'image': relative_path, 'grade': grade}) + + except FileNotFoundError: + print(f"Peringatan: Tidak ada DATAFILE di folder {eye_path}") + except Exception as e: + print(f"Error saat memproses folder {eye_path}: {e}") + +print(f"\nTotal {len(all_image_data)} gambar ditemukan dan diberi label.") + +# Buat DataFrame pandas dari daftar data +df = pd.DataFrame(all_image_data) + +# Simpan ke file CSV +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' yang sudah terisi lengkap berhasil dibuat.")' +OUTPUT_CSV_PATH = 'labels.csv' + +# --- SCRIPT UTAMA --- +all_image_data = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}' untuk membuat labels.csv secara otomatis...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Path ke file DATAFILE yang berisi grade + datafile_path = os.path.join(eye_path, 'DATAFILE') + + try: + # Baca grade dari DATAFILE + with open(datafile_path, 'r') as f: + grade = f.read().strip() # .strip() untuk menghapus spasi/baris baru + + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + # Buat path relatif, misal: '01/DER/IM000001.png' + relative_path = os.path.join(patient_id, eye_folder, filename) + # Tambahkan data ke daftar + all_image_data.append({'image': relative_path, 'grade': grade}) + + except FileNotFoundError: + print(f"Peringatan: Tidak ada DATAFILE di folder {eye_path}") + except Exception as e: + print(f"Error saat memproses folder {eye_path}: {e}") + +print(f"\nTotal {len(all_image_data)} gambar ditemukan dan diberi label.") + +# Buat DataFrame pandas dari daftar data +df = pd.DataFrame(all_image_data) + +# Simpan ke file CSV +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' yang sudah terisi lengkap berhasil dibuat.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923133805.py b/.history/src/generate_csv_20250923133805.py new file mode 100644 index 0000000000000000000000000000000000000000..8535b5d7b10799b0bd335c2e1d66e392fed261c4 --- /dev/null +++ b/.history/src/generate_csv_20250923133805.py @@ -0,0 +1,57 @@ +# generate_csv.py (Versi Final Otomatis) + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Path ke dataset asli Anda yang berisi folder 01, 02, ... +# Dijalankan dari folder utama proyek (Cataract-ViT) +SOURCE_DATA_DIR = 'Nuclear Cataract Dataset' +OUTPUT_CSV_PATH = 'labels.csv' + +# --- SCRIPT UTAMA --- +all_image_data = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}' untuk membuat labels.csv secara otomatis...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Path ke file DATAFILE yang berisi grade + datafile_path = os.path.join(eye_path, 'DATAFILE') + + try: + # Baca grade dari DATAFILE + with open(datafile_path, 'r') as f: + grade = f.read().strip() # .strip() untuk menghapus spasi/baris baru + + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + # Buat path relatif, misal: '01/DER/IM000001.png' + relative_path = os.path.join(patient_id, eye_folder, filename) + # Tambahkan data ke daftar + all_image_data.append({'image': relative_path, 'grade': grade}) + + except FileNotFoundError: + print(f"Peringatan: Tidak ada DATAFILE di folder {eye_path}") + except Exception as e: + print(f"Error saat memproses folder {eye_path}: {e}") + +print(f"\nTotal {len(all_image_data)} gambar ditemukan dan diberi label.") + +# Buat DataFrame pandas dari daftar data +df = pd.DataFrame(all_image_data) + +# Simpan ke file CSV +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' yang sudah terisi lengkap berhasil dibuat.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923133807.py b/.history/src/generate_csv_20250923133807.py new file mode 100644 index 0000000000000000000000000000000000000000..2a13d4f8df00b3d50899c937e644d4112f619446 --- /dev/null +++ b/.history/src/generate_csv_20250923133807.py @@ -0,0 +1,57 @@ +# generate_csv.py (Versi Final Otomatis) + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Path ke dataset asli Anda yang berisi folder 01, 02, ... +# Dijalankan dari folder utama proyek (Cataract-ViT) +SOURCE_DATA_DIR = '' +OUTPUT_CSV_PATH = 'labels.csv' + +# --- SCRIPT UTAMA --- +all_image_data = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}' untuk membuat labels.csv secara otomatis...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Path ke file DATAFILE yang berisi grade + datafile_path = os.path.join(eye_path, 'DATAFILE') + + try: + # Baca grade dari DATAFILE + with open(datafile_path, 'r') as f: + grade = f.read().strip() # .strip() untuk menghapus spasi/baris baru + + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + # Buat path relatif, misal: '01/DER/IM000001.png' + relative_path = os.path.join(patient_id, eye_folder, filename) + # Tambahkan data ke daftar + all_image_data.append({'image': relative_path, 'grade': grade}) + + except FileNotFoundError: + print(f"Peringatan: Tidak ada DATAFILE di folder {eye_path}") + except Exception as e: + print(f"Error saat memproses folder {eye_path}: {e}") + +print(f"\nTotal {len(all_image_data)} gambar ditemukan dan diberi label.") + +# Buat DataFrame pandas dari daftar data +df = pd.DataFrame(all_image_data) + +# Simpan ke file CSV +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' yang sudah terisi lengkap berhasil dibuat.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923133816.py b/.history/src/generate_csv_20250923133816.py new file mode 100644 index 0000000000000000000000000000000000000000..55c7e2313bcb4b8e5b79626915f0ca12a46f9d8b --- /dev/null +++ b/.history/src/generate_csv_20250923133816.py @@ -0,0 +1,57 @@ +# generate_csv.py (Versi Final Otomatis) + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Path ke dataset asli Anda yang berisi folder 01, 02, ... +# Dijalankan dari folder utama proyek (Cataract-ViT) +SOURCE_DATA_DIR = 'data\Nuclear Cataract Database for Biomedical and Machine Learning Applications\Nuclear Cataract Dataset' +OUTPUT_CSV_PATH = 'labels.csv' + +# --- SCRIPT UTAMA --- +all_image_data = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}' untuk membuat labels.csv secara otomatis...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Path ke file DATAFILE yang berisi grade + datafile_path = os.path.join(eye_path, 'DATAFILE') + + try: + # Baca grade dari DATAFILE + with open(datafile_path, 'r') as f: + grade = f.read().strip() # .strip() untuk menghapus spasi/baris baru + + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + # Buat path relatif, misal: '01/DER/IM000001.png' + relative_path = os.path.join(patient_id, eye_folder, filename) + # Tambahkan data ke daftar + all_image_data.append({'image': relative_path, 'grade': grade}) + + except FileNotFoundError: + print(f"Peringatan: Tidak ada DATAFILE di folder {eye_path}") + except Exception as e: + print(f"Error saat memproses folder {eye_path}: {e}") + +print(f"\nTotal {len(all_image_data)} gambar ditemukan dan diberi label.") + +# Buat DataFrame pandas dari daftar data +df = pd.DataFrame(all_image_data) + +# Simpan ke file CSV +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' yang sudah terisi lengkap berhasil dibuat.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923133820.py b/.history/src/generate_csv_20250923133820.py new file mode 100644 index 0000000000000000000000000000000000000000..f083d01dc0e024c713a831755625bbf56e7916cd --- /dev/null +++ b/.history/src/generate_csv_20250923133820.py @@ -0,0 +1,57 @@ +# generate_csv.py (Versi Final Otomatis) + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Path ke dataset asli Anda yang berisi folder 01, 02, ... +# Dijalankan dari folder utama proyek (Cataract-ViT) +SOURCE_DATA_DIR = 'data/Nuclear Cataract Database for Biomedical and Machine Learning Applications\Nuclear Cataract Dataset' +OUTPUT_CSV_PATH = 'labels.csv' + +# --- SCRIPT UTAMA --- +all_image_data = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}' untuk membuat labels.csv secara otomatis...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Path ke file DATAFILE yang berisi grade + datafile_path = os.path.join(eye_path, 'DATAFILE') + + try: + # Baca grade dari DATAFILE + with open(datafile_path, 'r') as f: + grade = f.read().strip() # .strip() untuk menghapus spasi/baris baru + + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + # Buat path relatif, misal: '01/DER/IM000001.png' + relative_path = os.path.join(patient_id, eye_folder, filename) + # Tambahkan data ke daftar + all_image_data.append({'image': relative_path, 'grade': grade}) + + except FileNotFoundError: + print(f"Peringatan: Tidak ada DATAFILE di folder {eye_path}") + except Exception as e: + print(f"Error saat memproses folder {eye_path}: {e}") + +print(f"\nTotal {len(all_image_data)} gambar ditemukan dan diberi label.") + +# Buat DataFrame pandas dari daftar data +df = pd.DataFrame(all_image_data) + +# Simpan ke file CSV +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' yang sudah terisi lengkap berhasil dibuat.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923133830.py b/.history/src/generate_csv_20250923133830.py new file mode 100644 index 0000000000000000000000000000000000000000..5157ab2f89bea88a3a0d8c873423537702f3f6fb --- /dev/null +++ b/.history/src/generate_csv_20250923133830.py @@ -0,0 +1,57 @@ +# generate_csv.py (Versi Final Otomatis) + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Path ke dataset asli Anda yang berisi folder 01, 02, ... +# Dijalankan dari folder utama proyek (Cataract-ViT) +SOURCE_DATA_DIR = 'data/Nuclear Cataract Database for Biomedical and Machine Learning Applications/Nuclear Cataract Dataset' +OUTPUT_CSV_PATH = 'labels.csv' + +# --- SCRIPT UTAMA --- +all_image_data = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}' untuk membuat labels.csv secara otomatis...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Path ke file DATAFILE yang berisi grade + datafile_path = os.path.join(eye_path, 'DATAFILE') + + try: + # Baca grade dari DATAFILE + with open(datafile_path, 'r') as f: + grade = f.read().strip() # .strip() untuk menghapus spasi/baris baru + + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + # Buat path relatif, misal: '01/DER/IM000001.png' + relative_path = os.path.join(patient_id, eye_folder, filename) + # Tambahkan data ke daftar + all_image_data.append({'image': relative_path, 'grade': grade}) + + except FileNotFoundError: + print(f"Peringatan: Tidak ada DATAFILE di folder {eye_path}") + except Exception as e: + print(f"Error saat memproses folder {eye_path}: {e}") + +print(f"\nTotal {len(all_image_data)} gambar ditemukan dan diberi label.") + +# Buat DataFrame pandas dari daftar data +df = pd.DataFrame(all_image_data) + +# Simpan ke file CSV +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' yang sudah terisi lengkap berhasil dibuat.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923133839.py b/.history/src/generate_csv_20250923133839.py new file mode 100644 index 0000000000000000000000000000000000000000..76fdc992949cd7f0ae27beb4b2a1ea273cb9f9cf --- /dev/null +++ b/.history/src/generate_csv_20250923133839.py @@ -0,0 +1,57 @@ +# generate_csv.py (Versi Final Otomatis) + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Path ke dataset asli Anda yang berisi folder 01, 02, ... +# Dijalankan dari folder utama proyek (Cataract-ViT) +SOURCE_DATA_DIR = 'data/Nuclear Cataract Database for Biomedical and Machine Learning Applications/Nuclear Cataract Dataset' +OUTPUT_CSV_PATH = 'outputs/csv/labels.csv' + +# --- SCRIPT UTAMA --- +all_image_data = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}' untuk membuat labels.csv secara otomatis...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Path ke file DATAFILE yang berisi grade + datafile_path = os.path.join(eye_path, 'DATAFILE') + + try: + # Baca grade dari DATAFILE + with open(datafile_path, 'r') as f: + grade = f.read().strip() # .strip() untuk menghapus spasi/baris baru + + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + # Buat path relatif, misal: '01/DER/IM000001.png' + relative_path = os.path.join(patient_id, eye_folder, filename) + # Tambahkan data ke daftar + all_image_data.append({'image': relative_path, 'grade': grade}) + + except FileNotFoundError: + print(f"Peringatan: Tidak ada DATAFILE di folder {eye_path}") + except Exception as e: + print(f"Error saat memproses folder {eye_path}: {e}") + +print(f"\nTotal {len(all_image_data)} gambar ditemukan dan diberi label.") + +# Buat DataFrame pandas dari daftar data +df = pd.DataFrame(all_image_data) + +# Simpan ke file CSV +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' yang sudah terisi lengkap berhasil dibuat.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923133843.py b/.history/src/generate_csv_20250923133843.py new file mode 100644 index 0000000000000000000000000000000000000000..760ac6cb0699eb535daaf76c4a9be725afc10b52 --- /dev/null +++ b/.history/src/generate_csv_20250923133843.py @@ -0,0 +1,57 @@ +# generate_csv.py (Versi Final Otomatis) + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Path ke dataset asli Anda yang berisi folder 01, 02, ... +# Dijalankan dari folder utama proyek (Cataract-ViT) +SOURCE_DATA_DIR = 'data/Nuclear Cataract Database for Biomedical and Machine Learning Applications/Nuclear Cataract Dataset' +OUTPUT_CSV_PATH = 'outputs/csv/label.csv' + +# --- SCRIPT UTAMA --- +all_image_data = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}' untuk membuat labels.csv secara otomatis...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Path ke file DATAFILE yang berisi grade + datafile_path = os.path.join(eye_path, 'DATAFILE') + + try: + # Baca grade dari DATAFILE + with open(datafile_path, 'r') as f: + grade = f.read().strip() # .strip() untuk menghapus spasi/baris baru + + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + # Buat path relatif, misal: '01/DER/IM000001.png' + relative_path = os.path.join(patient_id, eye_folder, filename) + # Tambahkan data ke daftar + all_image_data.append({'image': relative_path, 'grade': grade}) + + except FileNotFoundError: + print(f"Peringatan: Tidak ada DATAFILE di folder {eye_path}") + except Exception as e: + print(f"Error saat memproses folder {eye_path}: {e}") + +print(f"\nTotal {len(all_image_data)} gambar ditemukan dan diberi label.") + +# Buat DataFrame pandas dari daftar data +df = pd.DataFrame(all_image_data) + +# Simpan ke file CSV +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' yang sudah terisi lengkap berhasil dibuat.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923133847.py b/.history/src/generate_csv_20250923133847.py new file mode 100644 index 0000000000000000000000000000000000000000..cb576bbef6170a6ca1858946c163d2e668ae5719 --- /dev/null +++ b/.history/src/generate_csv_20250923133847.py @@ -0,0 +1,57 @@ +# generate_csv.py (Versi Final Otomatis) + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Path ke dataset asli Anda yang berisi folder 01, 02, ... +# Dijalankan dari folder utama proyek (Cataract-ViT) +SOURCE_DATA_DIR = 'data/Nuclear Cataract Database for Biomedical and Machine Learning Applications/Nuclear Cataract Dataset' +OUTPUT_CSV_PATH = 'outputs/csv/labelsNew.csv' + +# --- SCRIPT UTAMA --- +all_image_data = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}' untuk membuat labels.csv secara otomatis...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Path ke file DATAFILE yang berisi grade + datafile_path = os.path.join(eye_path, 'DATAFILE') + + try: + # Baca grade dari DATAFILE + with open(datafile_path, 'r') as f: + grade = f.read().strip() # .strip() untuk menghapus spasi/baris baru + + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + # Buat path relatif, misal: '01/DER/IM000001.png' + relative_path = os.path.join(patient_id, eye_folder, filename) + # Tambahkan data ke daftar + all_image_data.append({'image': relative_path, 'grade': grade}) + + except FileNotFoundError: + print(f"Peringatan: Tidak ada DATAFILE di folder {eye_path}") + except Exception as e: + print(f"Error saat memproses folder {eye_path}: {e}") + +print(f"\nTotal {len(all_image_data)} gambar ditemukan dan diberi label.") + +# Buat DataFrame pandas dari daftar data +df = pd.DataFrame(all_image_data) + +# Simpan ke file CSV +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' yang sudah terisi lengkap berhasil dibuat.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923134050.py b/.history/src/generate_csv_20250923134050.py new file mode 100644 index 0000000000000000000000000000000000000000..350b22a3f4d203f77321f23ab80c5727de3d4659 --- /dev/null +++ b/.history/src/generate_csv_20250923134050.py @@ -0,0 +1,57 @@ +# generate_csv.py (Versi Final Otomatis) + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Path ke dataset asli Anda yang berisi folder 01, 02, ... +# Dijalankan dari folder utama proyek (Cataract-ViT) +SOURCE_DATA_DIR = 'data/Nuclear Cataract Database for Biomedical and Machine Learning Applications/Nuclear Cataract Dataset' +OUTPUT_CSV_PATH = 'outputs/csv/labelsNew.csv' + +# --- SCRIPT UTAMA --- +all_image_data = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}' untuk membuat labels.csv secara otomatis...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Path ke file DATAFILE yang berisi grade + datafile_path = os.path.join(eye_path, 'DATAFILE') + + try: +# Baca grade dari DATAFILE +with open(datafile_path, 'r', encoding='utf-8') as f: + grade = f.read().strip() # .strip() untuk menghapus spasi/baris baru + + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + # Buat path relatif, misal: '01/DER/IM000001.png' + relative_path = os.path.join(patient_id, eye_folder, filename) + # Tambahkan data ke daftar + all_image_data.append({'image': relative_path, 'grade': grade}) + + except FileNotFoundError: + print(f"Peringatan: Tidak ada DATAFILE di folder {eye_path}") + except Exception as e: + print(f"Error saat memproses folder {eye_path}: {e}") + +print(f"\nTotal {len(all_image_data)} gambar ditemukan dan diberi label.") + +# Buat DataFrame pandas dari daftar data +df = pd.DataFrame(all_image_data) + +# Simpan ke file CSV +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' yang sudah terisi lengkap berhasil dibuat.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923134051.py b/.history/src/generate_csv_20250923134051.py new file mode 100644 index 0000000000000000000000000000000000000000..cb576bbef6170a6ca1858946c163d2e668ae5719 --- /dev/null +++ b/.history/src/generate_csv_20250923134051.py @@ -0,0 +1,57 @@ +# generate_csv.py (Versi Final Otomatis) + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Path ke dataset asli Anda yang berisi folder 01, 02, ... +# Dijalankan dari folder utama proyek (Cataract-ViT) +SOURCE_DATA_DIR = 'data/Nuclear Cataract Database for Biomedical and Machine Learning Applications/Nuclear Cataract Dataset' +OUTPUT_CSV_PATH = 'outputs/csv/labelsNew.csv' + +# --- SCRIPT UTAMA --- +all_image_data = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}' untuk membuat labels.csv secara otomatis...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Path ke file DATAFILE yang berisi grade + datafile_path = os.path.join(eye_path, 'DATAFILE') + + try: + # Baca grade dari DATAFILE + with open(datafile_path, 'r') as f: + grade = f.read().strip() # .strip() untuk menghapus spasi/baris baru + + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + # Buat path relatif, misal: '01/DER/IM000001.png' + relative_path = os.path.join(patient_id, eye_folder, filename) + # Tambahkan data ke daftar + all_image_data.append({'image': relative_path, 'grade': grade}) + + except FileNotFoundError: + print(f"Peringatan: Tidak ada DATAFILE di folder {eye_path}") + except Exception as e: + print(f"Error saat memproses folder {eye_path}: {e}") + +print(f"\nTotal {len(all_image_data)} gambar ditemukan dan diberi label.") + +# Buat DataFrame pandas dari daftar data +df = pd.DataFrame(all_image_data) + +# Simpan ke file CSV +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' yang sudah terisi lengkap berhasil dibuat.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923134101.py b/.history/src/generate_csv_20250923134101.py new file mode 100644 index 0000000000000000000000000000000000000000..8bdbfc373a60bd90303b440da5ae4fe328a089e3 --- /dev/null +++ b/.history/src/generate_csv_20250923134101.py @@ -0,0 +1,57 @@ +# generate_csv.py (Versi Final Otomatis) + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Path ke dataset asli Anda yang berisi folder 01, 02, ... +# Dijalankan dari folder utama proyek (Cataract-ViT) +SOURCE_DATA_DIR = 'data/Nuclear Cataract Database for Biomedical and Machine Learning Applications/Nuclear Cataract Dataset' +OUTPUT_CSV_PATH = 'outputs/csv/labelsNew.csv' + +# --- SCRIPT UTAMA --- +all_image_data = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}' untuk membuat labels.csv secara otomatis...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Path ke file DATAFILE yang berisi grade + datafile_path = os.path.join(eye_path, 'DATAFILE') + + try: + # Baca grade dari DATAFILE + with open(datafile_path, 'r', encoding='utf-8') as f: + grade = f.read().strip() # .strip() untuk menghapus spasi/baris baru + + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + # Buat path relatif, misal: '01/DER/IM000001.png' + relative_path = os.path.join(patient_id, eye_folder, filename) + # Tambahkan data ke daftar + all_image_data.append({'image': relative_path, 'grade': grade}) + + except FileNotFoundError: + print(f"Peringatan: Tidak ada DATAFILE di folder {eye_path}") + except Exception as e: + print(f"Error saat memproses folder {eye_path}: {e}") + +print(f"\nTotal {len(all_image_data)} gambar ditemukan dan diberi label.") + +# Buat DataFrame pandas dari daftar data +df = pd.DataFrame(all_image_data) + +# Simpan ke file CSV +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' yang sudah terisi lengkap berhasil dibuat.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923134139.py b/.history/src/generate_csv_20250923134139.py new file mode 100644 index 0000000000000000000000000000000000000000..27207f90d55980c58df4ad2fafc391a030829cf9 --- /dev/null +++ b/.history/src/generate_csv_20250923134139.py @@ -0,0 +1,51 @@ +# generate_csv.py (Versi Final dengan Perbaikan Encoding) + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Dijalankan dari folder utama proyek (Cataract-ViT) +SOURCE_DATA_DIR = 'Nuclear Cataract Dataset' +OUTPUT_CSV_PATH = 'labels.csv' + +# --- SCRIPT UTAMA --- +all_image_data = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}' untuk membuat labels.csv secara otomatis...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Path ke file DATAFILE yang berisi grade + datafile_path = os.path.join(eye_path, 'DATAFILE') + + try: + # Baca grade dari DATAFILE dengan encoding UTF-8 + with open(datafile_path, 'r', encoding='utf-8') as f: + grade = f.read().strip() + + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + relative_path = os.path.join(patient_id, eye_folder, filename) + all_image_data.append({'image': relative_path, 'grade': grade}) + + except FileNotFoundError: + print(f"Peringatan: Tidak ada DATAFILE di folder {eye_path}") + except Exception as e: + print(f"Error saat memproses folder {eye_path}: {e}") + +print(f"\nTotal {len(all_image_data)} gambar ditemukan dan diberi label.") + +df = pd.DataFrame(all_image_data) +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' yang sudah terisi lengkap berhasil dibuat.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923134147.py b/.history/src/generate_csv_20250923134147.py new file mode 100644 index 0000000000000000000000000000000000000000..9c5be10b919788d038b9011ae194d4328bf69498 --- /dev/null +++ b/.history/src/generate_csv_20250923134147.py @@ -0,0 +1,51 @@ +# generate_csv.py (Versi Final dengan Perbaikan Encoding) + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Dijalankan dari folder utama proyek (Cataract-ViT) +SOURCE_DATA_DIR = 'data/Nuclear Cataract Database for Biomedical and Machine Learning Applications/Nuclear Cataract Dataset' +OUTPUT_CSV_PATH = 'labels.csv' + +# --- SCRIPT UTAMA --- +all_image_data = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}' untuk membuat labels.csv secara otomatis...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Path ke file DATAFILE yang berisi grade + datafile_path = os.path.join(eye_path, 'DATAFILE') + + try: + # Baca grade dari DATAFILE dengan encoding UTF-8 + with open(datafile_path, 'r', encoding='utf-8') as f: + grade = f.read().strip() + + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + relative_path = os.path.join(patient_id, eye_folder, filename) + all_image_data.append({'image': relative_path, 'grade': grade}) + + except FileNotFoundError: + print(f"Peringatan: Tidak ada DATAFILE di folder {eye_path}") + except Exception as e: + print(f"Error saat memproses folder {eye_path}: {e}") + +print(f"\nTotal {len(all_image_data)} gambar ditemukan dan diberi label.") + +df = pd.DataFrame(all_image_data) +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' yang sudah terisi lengkap berhasil dibuat.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923134155.py b/.history/src/generate_csv_20250923134155.py new file mode 100644 index 0000000000000000000000000000000000000000..07a4e55bf1e1808d4dd6b63e40b401f011867881 --- /dev/null +++ b/.history/src/generate_csv_20250923134155.py @@ -0,0 +1,51 @@ +# generate_csv.py (Versi Final dengan Perbaikan Encoding) + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Dijalankan dari folder utama proyek (Cataract-ViT) +SOURCE_DATA_DIR = 'data/Nuclear Cataract Database for Biomedical and Machine Learning Applications/Nuclear Cataract Dataset' +OUTPUT_CSV_PATH = 'outputs/lalabels.csv' + +# --- SCRIPT UTAMA --- +all_image_data = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}' untuk membuat labels.csv secara otomatis...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Path ke file DATAFILE yang berisi grade + datafile_path = os.path.join(eye_path, 'DATAFILE') + + try: + # Baca grade dari DATAFILE dengan encoding UTF-8 + with open(datafile_path, 'r', encoding='utf-8') as f: + grade = f.read().strip() + + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + relative_path = os.path.join(patient_id, eye_folder, filename) + all_image_data.append({'image': relative_path, 'grade': grade}) + + except FileNotFoundError: + print(f"Peringatan: Tidak ada DATAFILE di folder {eye_path}") + except Exception as e: + print(f"Error saat memproses folder {eye_path}: {e}") + +print(f"\nTotal {len(all_image_data)} gambar ditemukan dan diberi label.") + +df = pd.DataFrame(all_image_data) +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' yang sudah terisi lengkap berhasil dibuat.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923134158.py b/.history/src/generate_csv_20250923134158.py new file mode 100644 index 0000000000000000000000000000000000000000..05bba60258b940f80404ac06ca4ea3e96047c82d --- /dev/null +++ b/.history/src/generate_csv_20250923134158.py @@ -0,0 +1,51 @@ +# generate_csv.py (Versi Final dengan Perbaikan Encoding) + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Dijalankan dari folder utama proyek (Cataract-ViT) +SOURCE_DATA_DIR = 'data/Nuclear Cataract Database for Biomedical and Machine Learning Applications/Nuclear Cataract Dataset' +OUTPUT_CSV_PATH = 'outputs/csv/labels.csv' + +# --- SCRIPT UTAMA --- +all_image_data = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}' untuk membuat labels.csv secara otomatis...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Path ke file DATAFILE yang berisi grade + datafile_path = os.path.join(eye_path, 'DATAFILE') + + try: + # Baca grade dari DATAFILE dengan encoding UTF-8 + with open(datafile_path, 'r', encoding='utf-8') as f: + grade = f.read().strip() + + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + relative_path = os.path.join(patient_id, eye_folder, filename) + all_image_data.append({'image': relative_path, 'grade': grade}) + + except FileNotFoundError: + print(f"Peringatan: Tidak ada DATAFILE di folder {eye_path}") + except Exception as e: + print(f"Error saat memproses folder {eye_path}: {e}") + +print(f"\nTotal {len(all_image_data)} gambar ditemukan dan diberi label.") + +df = pd.DataFrame(all_image_data) +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' yang sudah terisi lengkap berhasil dibuat.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923134214.py b/.history/src/generate_csv_20250923134214.py new file mode 100644 index 0000000000000000000000000000000000000000..3643e1a9a41c1dbc739aa5ea539a811e2868af37 --- /dev/null +++ b/.history/src/generate_csv_20250923134214.py @@ -0,0 +1,51 @@ +# generate_csv.py (Versi Final dengan Perbaikan Encoding) + +import os +import pandas as pd + +# --- KONFIGURASI --- +# Dijalankan dari folder utama proyek (Cataract-ViT) +SOURCE_DATA_DIR = 'data/Nuclear Cataract Database for Biomedical and Machine Learning Applications/Nuclear Cataract Dataset' +OUTPUT_CSV_PATH = 'outputs/csv/labelsNew.csv' + +# --- SCRIPT UTAMA --- +all_image_data = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}' untuk membuat labels.csv secara otomatis...") + +# Berjalan melalui setiap folder pasien (01, 02, ...) +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + # Berjalan melalui setiap subfolder mata (DER, IZQ) + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + # Path ke file DATAFILE yang berisi grade + datafile_path = os.path.join(eye_path, 'DATAFILE') + + try: + # Baca grade dari DATAFILE dengan encoding UTF-8 + with open(datafile_path, 'r', encoding='utf-8') as f: + grade = f.read().strip() + + # Cari semua file gambar di dalam folder mata + for filename in os.listdir(eye_path): + if filename.lower().endswith(('.png', '.jpg', '.jpeg')): + relative_path = os.path.join(patient_id, eye_folder, filename) + all_image_data.append({'image': relative_path, 'grade': grade}) + + except FileNotFoundError: + print(f"Peringatan: Tidak ada DATAFILE di folder {eye_path}") + except Exception as e: + print(f"Error saat memproses folder {eye_path}: {e}") + +print(f"\nTotal {len(all_image_data)} gambar ditemukan dan diberi label.") + +df = pd.DataFrame(all_image_data) +df.to_csv(OUTPUT_CSV_PATH, index=False) + +print(f"\n--- Selesai ---") +print(f"File '{OUTPUT_CSV_PATH}' yang sudah terisi lengkap berhasil dibuat.") \ No newline at end of file diff --git a/.history/src/generate_csv_20250923134328.py b/.history/src/generate_csv_20250923134328.py new file mode 100644 index 0000000000000000000000000000000000000000..f179f1d2870eead1c989e027c92f0b1d423a42e2 --- /dev/null +++ b/.history/src/generate_csv_20250923134328.py @@ -0,0 +1,49 @@ +# generate_csv.py (Versi Final dengan Pembacaan Biner) + +import os +import pandas as pd +import struct + +# --- KONFIGURASI --- +SOURCE_DATA_DIR = 'Nuclear Cataract Dataset' +OUTPUT_CSV_PATH = 'labels.csv' + +# --- SCRIPT UTAMA --- +all_image_data = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}' untuk membuat labels.csv secara otomatis...") + +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + + if os.path.isdir(patient_path): + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + + if os.path.isdir(eye_path): + datafile_path = os.path.join(eye_path, 'DATAFILE') + + try: + # Baca grade dari DATAFILE dalam mode biner ('rb') + with open(datafile_path, 'rb') as f: + # Asumsikan grade adalah integer pertama dalam file biner + grade_bytes = f.read(4) # Baca 4 byte pertama untuk integer + grade = struct.unpack('= 4: + grade_as_integer = struct.unpack('= 8: + grade_as_double = struct.unpack('= 4: + grade_as_integer = struct.unpack('= 8: + grade_as_double = struct.unpack(' {source_file}") + +# Buat folder utama untuk data split +if os.path.exists(SPLIT_DATA_DIR): + shutil.rmtree(SPLIT_DATA_DIR) +os.makedirs(SPLIT_DATA_DIR) + +# Jalankan proses penyalinan +copy_files(train_patients, 'train') +copy_files(valid_patients, 'valid') + +print("\n--- Proses splitting per pasien selesai! ---") +print(f"Dataset baru siap di folder '{SPLIT_DATA_DIR}' dengan struktur yang benar.") \ No newline at end of file diff --git a/.history/src/prepare_dataset_20250923135257.py b/.history/src/prepare_dataset_20250923135257.py new file mode 100644 index 0000000000000000000000000000000000000000..ddf2b97fc23c35de1391b305b99059d094573d2b --- /dev/null +++ b/.history/src/prepare_dataset_20250923135257.py @@ -0,0 +1,73 @@ +# prepare_dataset.py + +import os +import shutil +import pandas as pd +from sklearn.model_selection import train_test_split + +# --- KONFIGURASI --- +# Path ke file CSV yang sudah lengkap labelnya +CSV_PATH = 'outplabels.csv' +# Path ke dataset asli yang berisi folder 01, 02, ... +SOURCE_DATA_DIR = 'Nuclear Cataract Dataset' +# Path ke folder baru tempat data split akan disimpan +SPLIT_DATA_DIR = 'data' +# Rasio untuk data validasi (0.2 berarti 20%) +VALIDATION_SPLIT_RATIO = 0.2 + +# --- SCRIPT UTAMA --- +print("Membaca file metadata labels.csv...") +df = pd.read_csv(CSV_PATH) + +# Ekstrak ID pasien dari path gambar +# Misal: dari '01/DER/image_01.png' menjadi '01' +df['patient_id'] = df['image'].apply(lambda x: x.split('/')[0]) + +# Dapatkan daftar unik semua pasien +unique_patients = df['patient_id'].unique() +print(f"Total pasien ditemukan: {len(unique_patients)}") + +# Bagi daftar pasien menjadi train dan validation +train_patients, valid_patients = train_test_split( + unique_patients, + test_size=VALIDATION_SPLIT_RATIO, + random_state=42 # random_state untuk hasil yang bisa direproduksi +) + +print(f"Jumlah pasien untuk training: {len(train_patients)}") +print(f"Jumlah pasien untuk validasi: {len(valid_patients)}") + +# Fungsi untuk menyalin file +def copy_files(patient_list, target_folder): + target_path = os.path.join(SPLIT_DATA_DIR, target_folder) + subset_df = df[df['patient_id'].isin(patient_list)] + + print(f"\nMemproses {len(subset_df)} gambar untuk set '{target_folder}'...") + + for index, row in subset_df.iterrows(): + grade = str(row['grade']) + image_path = row['image'] + + class_folder = os.path.join(target_path, grade) + os.makedirs(class_folder, exist_ok=True) + + source_file = os.path.join(SOURCE_DATA_DIR, image_path) + destination_file = os.path.join(class_folder, os.path.basename(image_path)) + + # Cek apakah file sumber ada sebelum menyalin + if os.path.exists(source_file): + shutil.copyfile(source_file, destination_file) + else: + print(f" Peringatan: File sumber tidak ditemukan -> {source_file}") + +# Buat folder utama untuk data split +if os.path.exists(SPLIT_DATA_DIR): + shutil.rmtree(SPLIT_DATA_DIR) +os.makedirs(SPLIT_DATA_DIR) + +# Jalankan proses penyalinan +copy_files(train_patients, 'train') +copy_files(valid_patients, 'valid') + +print("\n--- Proses splitting per pasien selesai! ---") +print(f"Dataset baru siap di folder '{SPLIT_DATA_DIR}' dengan struktur yang benar.") \ No newline at end of file diff --git a/.history/src/prepare_dataset_20250923135300.py b/.history/src/prepare_dataset_20250923135300.py new file mode 100644 index 0000000000000000000000000000000000000000..b3a90c120f4cfb7561c7e86563db3664e3b7c09d --- /dev/null +++ b/.history/src/prepare_dataset_20250923135300.py @@ -0,0 +1,73 @@ +# prepare_dataset.py + +import os +import shutil +import pandas as pd +from sklearn.model_selection import train_test_split + +# --- KONFIGURASI --- +# Path ke file CSV yang sudah lengkap labelnya +CSV_PATH = 'outputs/labels.csv' +# Path ke dataset asli yang berisi folder 01, 02, ... +SOURCE_DATA_DIR = 'Nuclear Cataract Dataset' +# Path ke folder baru tempat data split akan disimpan +SPLIT_DATA_DIR = 'data' +# Rasio untuk data validasi (0.2 berarti 20%) +VALIDATION_SPLIT_RATIO = 0.2 + +# --- SCRIPT UTAMA --- +print("Membaca file metadata labels.csv...") +df = pd.read_csv(CSV_PATH) + +# Ekstrak ID pasien dari path gambar +# Misal: dari '01/DER/image_01.png' menjadi '01' +df['patient_id'] = df['image'].apply(lambda x: x.split('/')[0]) + +# Dapatkan daftar unik semua pasien +unique_patients = df['patient_id'].unique() +print(f"Total pasien ditemukan: {len(unique_patients)}") + +# Bagi daftar pasien menjadi train dan validation +train_patients, valid_patients = train_test_split( + unique_patients, + test_size=VALIDATION_SPLIT_RATIO, + random_state=42 # random_state untuk hasil yang bisa direproduksi +) + +print(f"Jumlah pasien untuk training: {len(train_patients)}") +print(f"Jumlah pasien untuk validasi: {len(valid_patients)}") + +# Fungsi untuk menyalin file +def copy_files(patient_list, target_folder): + target_path = os.path.join(SPLIT_DATA_DIR, target_folder) + subset_df = df[df['patient_id'].isin(patient_list)] + + print(f"\nMemproses {len(subset_df)} gambar untuk set '{target_folder}'...") + + for index, row in subset_df.iterrows(): + grade = str(row['grade']) + image_path = row['image'] + + class_folder = os.path.join(target_path, grade) + os.makedirs(class_folder, exist_ok=True) + + source_file = os.path.join(SOURCE_DATA_DIR, image_path) + destination_file = os.path.join(class_folder, os.path.basename(image_path)) + + # Cek apakah file sumber ada sebelum menyalin + if os.path.exists(source_file): + shutil.copyfile(source_file, destination_file) + else: + print(f" Peringatan: File sumber tidak ditemukan -> {source_file}") + +# Buat folder utama untuk data split +if os.path.exists(SPLIT_DATA_DIR): + shutil.rmtree(SPLIT_DATA_DIR) +os.makedirs(SPLIT_DATA_DIR) + +# Jalankan proses penyalinan +copy_files(train_patients, 'train') +copy_files(valid_patients, 'valid') + +print("\n--- Proses splitting per pasien selesai! ---") +print(f"Dataset baru siap di folder '{SPLIT_DATA_DIR}' dengan struktur yang benar.") \ No newline at end of file diff --git a/.history/src/prepare_dataset_20250923135303.py b/.history/src/prepare_dataset_20250923135303.py new file mode 100644 index 0000000000000000000000000000000000000000..5c10b57fca52fc77f0c6969b3d9792817fbbaa91 --- /dev/null +++ b/.history/src/prepare_dataset_20250923135303.py @@ -0,0 +1,73 @@ +# prepare_dataset.py + +import os +import shutil +import pandas as pd +from sklearn.model_selection import train_test_split + +# --- KONFIGURASI --- +# Path ke file CSV yang sudah lengkap labelnya +CSV_PATH = 'outputs/labelsNew.csv' +# Path ke dataset asli yang berisi folder 01, 02, ... +SOURCE_DATA_DIR = 'Nuclear Cataract Dataset' +# Path ke folder baru tempat data split akan disimpan +SPLIT_DATA_DIR = 'data' +# Rasio untuk data validasi (0.2 berarti 20%) +VALIDATION_SPLIT_RATIO = 0.2 + +# --- SCRIPT UTAMA --- +print("Membaca file metadata labels.csv...") +df = pd.read_csv(CSV_PATH) + +# Ekstrak ID pasien dari path gambar +# Misal: dari '01/DER/image_01.png' menjadi '01' +df['patient_id'] = df['image'].apply(lambda x: x.split('/')[0]) + +# Dapatkan daftar unik semua pasien +unique_patients = df['patient_id'].unique() +print(f"Total pasien ditemukan: {len(unique_patients)}") + +# Bagi daftar pasien menjadi train dan validation +train_patients, valid_patients = train_test_split( + unique_patients, + test_size=VALIDATION_SPLIT_RATIO, + random_state=42 # random_state untuk hasil yang bisa direproduksi +) + +print(f"Jumlah pasien untuk training: {len(train_patients)}") +print(f"Jumlah pasien untuk validasi: {len(valid_patients)}") + +# Fungsi untuk menyalin file +def copy_files(patient_list, target_folder): + target_path = os.path.join(SPLIT_DATA_DIR, target_folder) + subset_df = df[df['patient_id'].isin(patient_list)] + + print(f"\nMemproses {len(subset_df)} gambar untuk set '{target_folder}'...") + + for index, row in subset_df.iterrows(): + grade = str(row['grade']) + image_path = row['image'] + + class_folder = os.path.join(target_path, grade) + os.makedirs(class_folder, exist_ok=True) + + source_file = os.path.join(SOURCE_DATA_DIR, image_path) + destination_file = os.path.join(class_folder, os.path.basename(image_path)) + + # Cek apakah file sumber ada sebelum menyalin + if os.path.exists(source_file): + shutil.copyfile(source_file, destination_file) + else: + print(f" Peringatan: File sumber tidak ditemukan -> {source_file}") + +# Buat folder utama untuk data split +if os.path.exists(SPLIT_DATA_DIR): + shutil.rmtree(SPLIT_DATA_DIR) +os.makedirs(SPLIT_DATA_DIR) + +# Jalankan proses penyalinan +copy_files(train_patients, 'train') +copy_files(valid_patients, 'valid') + +print("\n--- Proses splitting per pasien selesai! ---") +print(f"Dataset baru siap di folder '{SPLIT_DATA_DIR}' dengan struktur yang benar.") \ No newline at end of file diff --git a/.history/src/prepare_dataset_20250923135316.py b/.history/src/prepare_dataset_20250923135316.py new file mode 100644 index 0000000000000000000000000000000000000000..4eb6f347b0bc53bd4d39e2776725494732e065ba --- /dev/null +++ b/.history/src/prepare_dataset_20250923135316.py @@ -0,0 +1,74 @@ +# prepare_dataset.py + +import os +import shutil +import pandas as pd +from sklearn.model_selection import train_test_split + +# --- KONFIGURASI --- +# Path ke file CSV yang sudah lengkap labelnya +CSV_PATH = 'outputs/labelsNew.csv' +# Path ke dataset asli yang berisi folder 01, 02, ... +SOURCE_DATA_DIR = 'data/Nuclear Cataract Database for Biomedical and Machine Learning Applications/Nuclear Cataract Dataset +' +# Path ke folder baru tempat data split akan disimpan +SPLIT_DATA_DIR = 'data' +# Rasio untuk data validasi (0.2 berarti 20%) +VALIDATION_SPLIT_RATIO = 0.2 + +# --- SCRIPT UTAMA --- +print("Membaca file metadata labels.csv...") +df = pd.read_csv(CSV_PATH) + +# Ekstrak ID pasien dari path gambar +# Misal: dari '01/DER/image_01.png' menjadi '01' +df['patient_id'] = df['image'].apply(lambda x: x.split('/')[0]) + +# Dapatkan daftar unik semua pasien +unique_patients = df['patient_id'].unique() +print(f"Total pasien ditemukan: {len(unique_patients)}") + +# Bagi daftar pasien menjadi train dan validation +train_patients, valid_patients = train_test_split( + unique_patients, + test_size=VALIDATION_SPLIT_RATIO, + random_state=42 # random_state untuk hasil yang bisa direproduksi +) + +print(f"Jumlah pasien untuk training: {len(train_patients)}") +print(f"Jumlah pasien untuk validasi: {len(valid_patients)}") + +# Fungsi untuk menyalin file +def copy_files(patient_list, target_folder): + target_path = os.path.join(SPLIT_DATA_DIR, target_folder) + subset_df = df[df['patient_id'].isin(patient_list)] + + print(f"\nMemproses {len(subset_df)} gambar untuk set '{target_folder}'...") + + for index, row in subset_df.iterrows(): + grade = str(row['grade']) + image_path = row['image'] + + class_folder = os.path.join(target_path, grade) + os.makedirs(class_folder, exist_ok=True) + + source_file = os.path.join(SOURCE_DATA_DIR, image_path) + destination_file = os.path.join(class_folder, os.path.basename(image_path)) + + # Cek apakah file sumber ada sebelum menyalin + if os.path.exists(source_file): + shutil.copyfile(source_file, destination_file) + else: + print(f" Peringatan: File sumber tidak ditemukan -> {source_file}") + +# Buat folder utama untuk data split +if os.path.exists(SPLIT_DATA_DIR): + shutil.rmtree(SPLIT_DATA_DIR) +os.makedirs(SPLIT_DATA_DIR) + +# Jalankan proses penyalinan +copy_files(train_patients, 'train') +copy_files(valid_patients, 'valid') + +print("\n--- Proses splitting per pasien selesai! ---") +print(f"Dataset baru siap di folder '{SPLIT_DATA_DIR}' dengan struktur yang benar.") \ No newline at end of file diff --git a/.history/src/prepare_dataset_20250923135318.py b/.history/src/prepare_dataset_20250923135318.py new file mode 100644 index 0000000000000000000000000000000000000000..076375a48c20b4795e9833af3aad1e65ffc8f6c7 --- /dev/null +++ b/.history/src/prepare_dataset_20250923135318.py @@ -0,0 +1,73 @@ +# prepare_dataset.py + +import os +import shutil +import pandas as pd +from sklearn.model_selection import train_test_split + +# --- KONFIGURASI --- +# Path ke file CSV yang sudah lengkap labelnya +CSV_PATH = 'outputs/labelsNew.csv' +# Path ke dataset asli yang berisi folder 01, 02, ... +SOURCE_DATA_DIR = 'data/Nuclear Cataract Database for Biomedical and Machine Learning Applications/Nuclear Cataract Dataset' +# Path ke folder baru tempat data split akan disimpan +SPLIT_DATA_DIR = 'data' +# Rasio untuk data validasi (0.2 berarti 20%) +VALIDATION_SPLIT_RATIO = 0.2 + +# --- SCRIPT UTAMA --- +print("Membaca file metadata labels.csv...") +df = pd.read_csv(CSV_PATH) + +# Ekstrak ID pasien dari path gambar +# Misal: dari '01/DER/image_01.png' menjadi '01' +df['patient_id'] = df['image'].apply(lambda x: x.split('/')[0]) + +# Dapatkan daftar unik semua pasien +unique_patients = df['patient_id'].unique() +print(f"Total pasien ditemukan: {len(unique_patients)}") + +# Bagi daftar pasien menjadi train dan validation +train_patients, valid_patients = train_test_split( + unique_patients, + test_size=VALIDATION_SPLIT_RATIO, + random_state=42 # random_state untuk hasil yang bisa direproduksi +) + +print(f"Jumlah pasien untuk training: {len(train_patients)}") +print(f"Jumlah pasien untuk validasi: {len(valid_patients)}") + +# Fungsi untuk menyalin file +def copy_files(patient_list, target_folder): + target_path = os.path.join(SPLIT_DATA_DIR, target_folder) + subset_df = df[df['patient_id'].isin(patient_list)] + + print(f"\nMemproses {len(subset_df)} gambar untuk set '{target_folder}'...") + + for index, row in subset_df.iterrows(): + grade = str(row['grade']) + image_path = row['image'] + + class_folder = os.path.join(target_path, grade) + os.makedirs(class_folder, exist_ok=True) + + source_file = os.path.join(SOURCE_DATA_DIR, image_path) + destination_file = os.path.join(class_folder, os.path.basename(image_path)) + + # Cek apakah file sumber ada sebelum menyalin + if os.path.exists(source_file): + shutil.copyfile(source_file, destination_file) + else: + print(f" Peringatan: File sumber tidak ditemukan -> {source_file}") + +# Buat folder utama untuk data split +if os.path.exists(SPLIT_DATA_DIR): + shutil.rmtree(SPLIT_DATA_DIR) +os.makedirs(SPLIT_DATA_DIR) + +# Jalankan proses penyalinan +copy_files(train_patients, 'train') +copy_files(valid_patients, 'valid') + +print("\n--- Proses splitting per pasien selesai! ---") +print(f"Dataset baru siap di folder '{SPLIT_DATA_DIR}' dengan struktur yang benar.") \ No newline at end of file diff --git a/.history/src/prepare_dataset_20250923135415.py b/.history/src/prepare_dataset_20250923135415.py new file mode 100644 index 0000000000000000000000000000000000000000..66d0b3d5b0a95449d35ddb1cf971c8b761ec4d89 --- /dev/null +++ b/.history/src/prepare_dataset_20250923135415.py @@ -0,0 +1,73 @@ +# prepare_dataset.py + +import os +import shutil +import pandas as pd +from sklearn.model_selection import train_test_split + +# --- KONFIGURASI --- +# Path ke file CSV yang sudah lengkap labelnya +CSV_PATH = 'outputs/labelsNew.csv' +# Path ke dataset asli yang berisi folder 01, 02, ... +SOURCE_DATA_DIR = 'data/Nuclear Cataract Database for Biomedical and Machine Learning Applications/Nuclear Cataract Dataset' +# Path ke folder baru tempat data split akan disimpan +SPLIT_DATA_DIR = 'data\split data' +# Rasio untuk data validasi (0.2 berarti 20%) +VALIDATION_SPLIT_RATIO = 0.2 + +# --- SCRIPT UTAMA --- +print("Membaca file metadata labels.csv...") +df = pd.read_csv(CSV_PATH) + +# Ekstrak ID pasien dari path gambar +# Misal: dari '01/DER/image_01.png' menjadi '01' +df['patient_id'] = df['image'].apply(lambda x: x.split('/')[0]) + +# Dapatkan daftar unik semua pasien +unique_patients = df['patient_id'].unique() +print(f"Total pasien ditemukan: {len(unique_patients)}") + +# Bagi daftar pasien menjadi train dan validation +train_patients, valid_patients = train_test_split( + unique_patients, + test_size=VALIDATION_SPLIT_RATIO, + random_state=42 # random_state untuk hasil yang bisa direproduksi +) + +print(f"Jumlah pasien untuk training: {len(train_patients)}") +print(f"Jumlah pasien untuk validasi: {len(valid_patients)}") + +# Fungsi untuk menyalin file +def copy_files(patient_list, target_folder): + target_path = os.path.join(SPLIT_DATA_DIR, target_folder) + subset_df = df[df['patient_id'].isin(patient_list)] + + print(f"\nMemproses {len(subset_df)} gambar untuk set '{target_folder}'...") + + for index, row in subset_df.iterrows(): + grade = str(row['grade']) + image_path = row['image'] + + class_folder = os.path.join(target_path, grade) + os.makedirs(class_folder, exist_ok=True) + + source_file = os.path.join(SOURCE_DATA_DIR, image_path) + destination_file = os.path.join(class_folder, os.path.basename(image_path)) + + # Cek apakah file sumber ada sebelum menyalin + if os.path.exists(source_file): + shutil.copyfile(source_file, destination_file) + else: + print(f" Peringatan: File sumber tidak ditemukan -> {source_file}") + +# Buat folder utama untuk data split +if os.path.exists(SPLIT_DATA_DIR): + shutil.rmtree(SPLIT_DATA_DIR) +os.makedirs(SPLIT_DATA_DIR) + +# Jalankan proses penyalinan +copy_files(train_patients, 'train') +copy_files(valid_patients, 'valid') + +print("\n--- Proses splitting per pasien selesai! ---") +print(f"Dataset baru siap di folder '{SPLIT_DATA_DIR}' dengan struktur yang benar.") \ No newline at end of file diff --git a/.history/src/prepare_dataset_20250923135417.py b/.history/src/prepare_dataset_20250923135417.py new file mode 100644 index 0000000000000000000000000000000000000000..81558425e9156012f844ee67b7df88f390030aa6 --- /dev/null +++ b/.history/src/prepare_dataset_20250923135417.py @@ -0,0 +1,73 @@ +# prepare_dataset.py + +import os +import shutil +import pandas as pd +from sklearn.model_selection import train_test_split + +# --- KONFIGURASI --- +# Path ke file CSV yang sudah lengkap labelnya +CSV_PATH = 'outputs/labelsNew.csv' +# Path ke dataset asli yang berisi folder 01, 02, ... +SOURCE_DATA_DIR = 'data/Nuclear Cataract Database for Biomedical and Machine Learning Applications/Nuclear Cataract Dataset' +# Path ke folder baru tempat data split akan disimpan +SPLIT_DATA_DIR = 'data/split data' +# Rasio untuk data validasi (0.2 berarti 20%) +VALIDATION_SPLIT_RATIO = 0.2 + +# --- SCRIPT UTAMA --- +print("Membaca file metadata labels.csv...") +df = pd.read_csv(CSV_PATH) + +# Ekstrak ID pasien dari path gambar +# Misal: dari '01/DER/image_01.png' menjadi '01' +df['patient_id'] = df['image'].apply(lambda x: x.split('/')[0]) + +# Dapatkan daftar unik semua pasien +unique_patients = df['patient_id'].unique() +print(f"Total pasien ditemukan: {len(unique_patients)}") + +# Bagi daftar pasien menjadi train dan validation +train_patients, valid_patients = train_test_split( + unique_patients, + test_size=VALIDATION_SPLIT_RATIO, + random_state=42 # random_state untuk hasil yang bisa direproduksi +) + +print(f"Jumlah pasien untuk training: {len(train_patients)}") +print(f"Jumlah pasien untuk validasi: {len(valid_patients)}") + +# Fungsi untuk menyalin file +def copy_files(patient_list, target_folder): + target_path = os.path.join(SPLIT_DATA_DIR, target_folder) + subset_df = df[df['patient_id'].isin(patient_list)] + + print(f"\nMemproses {len(subset_df)} gambar untuk set '{target_folder}'...") + + for index, row in subset_df.iterrows(): + grade = str(row['grade']) + image_path = row['image'] + + class_folder = os.path.join(target_path, grade) + os.makedirs(class_folder, exist_ok=True) + + source_file = os.path.join(SOURCE_DATA_DIR, image_path) + destination_file = os.path.join(class_folder, os.path.basename(image_path)) + + # Cek apakah file sumber ada sebelum menyalin + if os.path.exists(source_file): + shutil.copyfile(source_file, destination_file) + else: + print(f" Peringatan: File sumber tidak ditemukan -> {source_file}") + +# Buat folder utama untuk data split +if os.path.exists(SPLIT_DATA_DIR): + shutil.rmtree(SPLIT_DATA_DIR) +os.makedirs(SPLIT_DATA_DIR) + +# Jalankan proses penyalinan +copy_files(train_patients, 'train') +copy_files(valid_patients, 'valid') + +print("\n--- Proses splitting per pasien selesai! ---") +print(f"Dataset baru siap di folder '{SPLIT_DATA_DIR}' dengan struktur yang benar.") \ No newline at end of file diff --git a/.history/src/prepare_dataset_20250923135706.py b/.history/src/prepare_dataset_20250923135706.py new file mode 100644 index 0000000000000000000000000000000000000000..03902540608185f61a4b7a9f075b132f79fcf7a5 --- /dev/null +++ b/.history/src/prepare_dataset_20250923135706.py @@ -0,0 +1,73 @@ +# prepare_dataset.py + +import os +import shutil +import pandas as pd +from sklearn.model_selection import train_test_split + +# --- KONFIGURASI --- +# Path ke file CSV yang sudah lengkap labelnya +CSV_PATH = 'outputs/csv/labelsNew.csv' +# Path ke dataset asli yang berisi folder 01, 02, ... +SOURCE_DATA_DIR = 'data/Nuclear Cataract Database for Biomedical and Machine Learning Applications/Nuclear Cataract Dataset' +# Path ke folder baru tempat data split akan disimpan +SPLIT_DATA_DIR = 'data/split data' +# Rasio untuk data validasi (0.2 berarti 20%) +VALIDATION_SPLIT_RATIO = 0.2 + +# --- SCRIPT UTAMA --- +print("Membaca file metadata labels.csv...") +df = pd.read_csv(CSV_PATH) + +# Ekstrak ID pasien dari path gambar +# Misal: dari '01/DER/image_01.png' menjadi '01' +df['patient_id'] = df['image'].apply(lambda x: x.split('/')[0]) + +# Dapatkan daftar unik semua pasien +unique_patients = df['patient_id'].unique() +print(f"Total pasien ditemukan: {len(unique_patients)}") + +# Bagi daftar pasien menjadi train dan validation +train_patients, valid_patients = train_test_split( + unique_patients, + test_size=VALIDATION_SPLIT_RATIO, + random_state=42 # random_state untuk hasil yang bisa direproduksi +) + +print(f"Jumlah pasien untuk training: {len(train_patients)}") +print(f"Jumlah pasien untuk validasi: {len(valid_patients)}") + +# Fungsi untuk menyalin file +def copy_files(patient_list, target_folder): + target_path = os.path.join(SPLIT_DATA_DIR, target_folder) + subset_df = df[df['patient_id'].isin(patient_list)] + + print(f"\nMemproses {len(subset_df)} gambar untuk set '{target_folder}'...") + + for index, row in subset_df.iterrows(): + grade = str(row['grade']) + image_path = row['image'] + + class_folder = os.path.join(target_path, grade) + os.makedirs(class_folder, exist_ok=True) + + source_file = os.path.join(SOURCE_DATA_DIR, image_path) + destination_file = os.path.join(class_folder, os.path.basename(image_path)) + + # Cek apakah file sumber ada sebelum menyalin + if os.path.exists(source_file): + shutil.copyfile(source_file, destination_file) + else: + print(f" Peringatan: File sumber tidak ditemukan -> {source_file}") + +# Buat folder utama untuk data split +if os.path.exists(SPLIT_DATA_DIR): + shutil.rmtree(SPLIT_DATA_DIR) +os.makedirs(SPLIT_DATA_DIR) + +# Jalankan proses penyalinan +copy_files(train_patients, 'train') +copy_files(valid_patients, 'valid') + +print("\n--- Proses splitting per pasien selesai! ---") +print(f"Dataset baru siap di folder '{SPLIT_DATA_DIR}' dengan struktur yang benar.") \ No newline at end of file diff --git a/.history/src/prepare_dataset_20250923151225.py b/.history/src/prepare_dataset_20250923151225.py new file mode 100644 index 0000000000000000000000000000000000000000..70a5fffe6fee5a0b1e1d576f509ba4b5de73ae22 --- /dev/null +++ b/.history/src/prepare_dataset_20250923151225.py @@ -0,0 +1,73 @@ +# prepare_dataset.py + +import os +import shutil +import pandas as pd +from sklearn.model_selection import train_test_split + +# --- KONFIGURASI --- +# Path ke file CSV yang sudah lengkap labelnya +CSV_PATH = 'outputs/csv/labels.csv' +# Path ke dataset asli yang berisi folder 01, 02, ... +SOURCE_DATA_DIR = 'data/Nuclear Cataract Database for Biomedical and Machine Learning Applications/Nuclear Cataract Dataset' +# Path ke folder baru tempat data split akan disimpan +SPLIT_DATA_DIR = 'data/split data' +# Rasio untuk data validasi (0.2 berarti 20%) +VALIDATION_SPLIT_RATIO = 0.2 + +# --- SCRIPT UTAMA --- +print("Membaca file metadata labels.csv...") +df = pd.read_csv(CSV_PATH) + +# Ekstrak ID pasien dari path gambar +# Misal: dari '01/DER/image_01.png' menjadi '01' +df['patient_id'] = df['image'].apply(lambda x: x.split('/')[0]) + +# Dapatkan daftar unik semua pasien +unique_patients = df['patient_id'].unique() +print(f"Total pasien ditemukan: {len(unique_patients)}") + +# Bagi daftar pasien menjadi train dan validation +train_patients, valid_patients = train_test_split( + unique_patients, + test_size=VALIDATION_SPLIT_RATIO, + random_state=42 # random_state untuk hasil yang bisa direproduksi +) + +print(f"Jumlah pasien untuk training: {len(train_patients)}") +print(f"Jumlah pasien untuk validasi: {len(valid_patients)}") + +# Fungsi untuk menyalin file +def copy_files(patient_list, target_folder): + target_path = os.path.join(SPLIT_DATA_DIR, target_folder) + subset_df = df[df['patient_id'].isin(patient_list)] + + print(f"\nMemproses {len(subset_df)} gambar untuk set '{target_folder}'...") + + for index, row in subset_df.iterrows(): + grade = str(row['grade']) + image_path = row['image'] + + class_folder = os.path.join(target_path, grade) + os.makedirs(class_folder, exist_ok=True) + + source_file = os.path.join(SOURCE_DATA_DIR, image_path) + destination_file = os.path.join(class_folder, os.path.basename(image_path)) + + # Cek apakah file sumber ada sebelum menyalin + if os.path.exists(source_file): + shutil.copyfile(source_file, destination_file) + else: + print(f" Peringatan: File sumber tidak ditemukan -> {source_file}") + +# Buat folder utama untuk data split +if os.path.exists(SPLIT_DATA_DIR): + shutil.rmtree(SPLIT_DATA_DIR) +os.makedirs(SPLIT_DATA_DIR) + +# Jalankan proses penyalinan +copy_files(train_patients, 'train') +copy_files(valid_patients, 'valid') + +print("\n--- Proses splitting per pasien selesai! ---") +print(f"Dataset baru siap di folder '{SPLIT_DATA_DIR}' dengan struktur yang benar.") \ No newline at end of file diff --git a/.history/src/split_dataset_20250924190002.py b/.history/src/split_dataset_20250924190002.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.history/src/split_dataset_20250924190007.py b/.history/src/split_dataset_20250924190007.py new file mode 100644 index 0000000000000000000000000000000000000000..77a0f7e0e5b84a086f078b3cc4adaef6856f7c71 --- /dev/null +++ b/.history/src/split_dataset_20250924190007.py @@ -0,0 +1,74 @@ +# split_data.py + +import os +import shutil +import random +import math + +# --- KONFIGURASI --- +# 1. Ganti 'original_dataset' dengan nama folder tempat Anda mengekstrak data Kaggle +SOURCE_DIR = 'original_dataset' +# 2. Ini adalah folder tujuan tempat 'train' dan 'valid' akan dibuat +TARGET_DIR = 'data' +# 3. Rasio pembagian (0.8 berarti 80% untuk training) +TRAIN_RATIO = 0.8 + +# --- SCRIPT UTAMA --- +print(f"Memulai proses split data dari folder '{SOURCE_DIR}'...") + +# Hapus folder target jika sudah ada untuk memulai dari awal +if os.path.exists(TARGET_DIR): + shutil.rmtree(TARGET_DIR) + +# Buat struktur folder target (train dan valid) +train_path = os.path.join(TARGET_DIR, 'train') +valid_path = os.path.join(TARGET_DIR, 'valid') +os.makedirs(train_path, exist_ok=True) +os.makedirs(valid_path, exist_ok=True) + +# Dapatkan daftar semua folder kelas di direktori sumber +try: + class_folders = [f for f in os.listdir(SOURCE_DIR) if os.path.isdir(os.path.join(SOURCE_DIR, f))] + if not class_folders: + raise FileNotFoundError +except FileNotFoundError: + print(f"!!! ERROR: Folder '{SOURCE_DIR}' tidak ditemukan atau kosong.") + print("Pastikan Anda sudah mengekstrak dataset Kaggle ke dalam folder tersebut.") + exit() + +print(f"Ditemukan {len(class_folders)} kelas: {class_folders}") + +# Loop melalui setiap folder kelas +for cls in class_folders: + source_class_path = os.path.join(SOURCE_DIR, cls) + + # Buat subfolder kelas di dalam train dan valid + train_class_path = os.path.join(train_path, cls) + valid_class_path = os.path.join(valid_path, cls) + os.makedirs(train_class_path, exist_ok=True) + os.makedirs(valid_class_path, exist_ok=True) + + # Dapatkan semua file gambar untuk kelas ini + images = [f for f in os.listdir(source_class_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))] + + # Acak urutan gambar + random.shuffle(images) + + # Hitung titik pembagian + split_point = math.floor(len(images) * TRAIN_RATIO) + + # Bagi daftar gambar menjadi train dan valid + train_images = images[:split_point] + valid_images = images[split_point:] + + print(f" Kelas '{cls}': {len(train_images)} train, {len(valid_images)} valid") + + # Salin file-file ke folder tujuan + for img in train_images: + shutil.copy(os.path.join(source_class_path, img), os.path.join(train_class_path, img)) + + for img in valid_images: + shutil.copy(os.path.join(source_class_path, img), os.path.join(valid_class_path, img)) + +print("\n--- Proses split data selesai! ---") +print(f"Folder '{TARGET_DIR}' dengan struktur 'train' dan 'valid' telah berhasil dibuat.") \ No newline at end of file diff --git a/.history/src/split_dataset_20250924192730.py b/.history/src/split_dataset_20250924192730.py new file mode 100644 index 0000000000000000000000000000000000000000..8dc514a68763e33be1b2c2743dd18bcebeaac95a --- /dev/null +++ b/.history/src/split_dataset_20250924192730.py @@ -0,0 +1,74 @@ +# split_data.py + +import os +import shutil +import random +import math + +# --- KONFIGURASI --- +# 1. Ganti 'original_dataset' dengan nama folder tempat Anda mengekstrak data Kaggle +SOURCE_DIR = 'data\dataset' +# 2. Ini adalah folder tujuan tempat 'train' dan 'valid' akan dibuat +TARGET_DIR = 'data' +# 3. Rasio pembagian (0.8 berarti 80% untuk training) +TRAIN_RATIO = 0.8 + +# --- SCRIPT UTAMA --- +print(f"Memulai proses split data dari folder '{SOURCE_DIR}'...") + +# Hapus folder target jika sudah ada untuk memulai dari awal +if os.path.exists(TARGET_DIR): + shutil.rmtree(TARGET_DIR) + +# Buat struktur folder target (train dan valid) +train_path = os.path.join(TARGET_DIR, 'train') +valid_path = os.path.join(TARGET_DIR, 'valid') +os.makedirs(train_path, exist_ok=True) +os.makedirs(valid_path, exist_ok=True) + +# Dapatkan daftar semua folder kelas di direktori sumber +try: + class_folders = [f for f in os.listdir(SOURCE_DIR) if os.path.isdir(os.path.join(SOURCE_DIR, f))] + if not class_folders: + raise FileNotFoundError +except FileNotFoundError: + print(f"!!! ERROR: Folder '{SOURCE_DIR}' tidak ditemukan atau kosong.") + print("Pastikan Anda sudah mengekstrak dataset Kaggle ke dalam folder tersebut.") + exit() + +print(f"Ditemukan {len(class_folders)} kelas: {class_folders}") + +# Loop melalui setiap folder kelas +for cls in class_folders: + source_class_path = os.path.join(SOURCE_DIR, cls) + + # Buat subfolder kelas di dalam train dan valid + train_class_path = os.path.join(train_path, cls) + valid_class_path = os.path.join(valid_path, cls) + os.makedirs(train_class_path, exist_ok=True) + os.makedirs(valid_class_path, exist_ok=True) + + # Dapatkan semua file gambar untuk kelas ini + images = [f for f in os.listdir(source_class_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))] + + # Acak urutan gambar + random.shuffle(images) + + # Hitung titik pembagian + split_point = math.floor(len(images) * TRAIN_RATIO) + + # Bagi daftar gambar menjadi train dan valid + train_images = images[:split_point] + valid_images = images[split_point:] + + print(f" Kelas '{cls}': {len(train_images)} train, {len(valid_images)} valid") + + # Salin file-file ke folder tujuan + for img in train_images: + shutil.copy(os.path.join(source_class_path, img), os.path.join(train_class_path, img)) + + for img in valid_images: + shutil.copy(os.path.join(source_class_path, img), os.path.join(valid_class_path, img)) + +print("\n--- Proses split data selesai! ---") +print(f"Folder '{TARGET_DIR}' dengan struktur 'train' dan 'valid' telah berhasil dibuat.") \ No newline at end of file diff --git a/.history/src/split_dataset_20250924192733.py b/.history/src/split_dataset_20250924192733.py new file mode 100644 index 0000000000000000000000000000000000000000..ae42090e1d08fa03823bd6b5d95d070861eb0177 --- /dev/null +++ b/.history/src/split_dataset_20250924192733.py @@ -0,0 +1,74 @@ +# split_data.py + +import os +import shutil +import random +import math + +# --- KONFIGURASI --- +# 1. Ganti 'original_dataset' dengan nama folder tempat Anda mengekstrak data Kaggle +SOURCE_DIR = 'data/dataset' +# 2. Ini adalah folder tujuan tempat 'train' dan 'valid' akan dibuat +TARGET_DIR = 'data' +# 3. Rasio pembagian (0.8 berarti 80% untuk training) +TRAIN_RATIO = 0.8 + +# --- SCRIPT UTAMA --- +print(f"Memulai proses split data dari folder '{SOURCE_DIR}'...") + +# Hapus folder target jika sudah ada untuk memulai dari awal +if os.path.exists(TARGET_DIR): + shutil.rmtree(TARGET_DIR) + +# Buat struktur folder target (train dan valid) +train_path = os.path.join(TARGET_DIR, 'train') +valid_path = os.path.join(TARGET_DIR, 'valid') +os.makedirs(train_path, exist_ok=True) +os.makedirs(valid_path, exist_ok=True) + +# Dapatkan daftar semua folder kelas di direktori sumber +try: + class_folders = [f for f in os.listdir(SOURCE_DIR) if os.path.isdir(os.path.join(SOURCE_DIR, f))] + if not class_folders: + raise FileNotFoundError +except FileNotFoundError: + print(f"!!! ERROR: Folder '{SOURCE_DIR}' tidak ditemukan atau kosong.") + print("Pastikan Anda sudah mengekstrak dataset Kaggle ke dalam folder tersebut.") + exit() + +print(f"Ditemukan {len(class_folders)} kelas: {class_folders}") + +# Loop melalui setiap folder kelas +for cls in class_folders: + source_class_path = os.path.join(SOURCE_DIR, cls) + + # Buat subfolder kelas di dalam train dan valid + train_class_path = os.path.join(train_path, cls) + valid_class_path = os.path.join(valid_path, cls) + os.makedirs(train_class_path, exist_ok=True) + os.makedirs(valid_class_path, exist_ok=True) + + # Dapatkan semua file gambar untuk kelas ini + images = [f for f in os.listdir(source_class_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))] + + # Acak urutan gambar + random.shuffle(images) + + # Hitung titik pembagian + split_point = math.floor(len(images) * TRAIN_RATIO) + + # Bagi daftar gambar menjadi train dan valid + train_images = images[:split_point] + valid_images = images[split_point:] + + print(f" Kelas '{cls}': {len(train_images)} train, {len(valid_images)} valid") + + # Salin file-file ke folder tujuan + for img in train_images: + shutil.copy(os.path.join(source_class_path, img), os.path.join(train_class_path, img)) + + for img in valid_images: + shutil.copy(os.path.join(source_class_path, img), os.path.join(valid_class_path, img)) + +print("\n--- Proses split data selesai! ---") +print(f"Folder '{TARGET_DIR}' dengan struktur 'train' dan 'valid' telah berhasil dibuat.") \ No newline at end of file diff --git a/.history/src/split_dataset_20250924192742.py b/.history/src/split_dataset_20250924192742.py new file mode 100644 index 0000000000000000000000000000000000000000..2eb0eb7b0707feb482552c8578b45b8fc1b29d36 --- /dev/null +++ b/.history/src/split_dataset_20250924192742.py @@ -0,0 +1,74 @@ +# split_data.py + +import os +import shutil +import random +import math + +# --- KONFIGURASI --- +# 1. Ganti 'original_dataset' dengan nama folder tempat Anda mengekstrak data Kaggle +SOURCE_DIR = 'data/dataset' +# 2. Ini adalah folder tujuan tempat 'train' dan 'valid' akan dibuat +TARGET_DIR = 'data/' +# 3. Rasio pembagian (0.8 berarti 80% untuk training) +TRAIN_RATIO = 0.8 + +# --- SCRIPT UTAMA --- +print(f"Memulai proses split data dari folder '{SOURCE_DIR}'...") + +# Hapus folder target jika sudah ada untuk memulai dari awal +if os.path.exists(TARGET_DIR): + shutil.rmtree(TARGET_DIR) + +# Buat struktur folder target (train dan valid) +train_path = os.path.join(TARGET_DIR, 'train') +valid_path = os.path.join(TARGET_DIR, 'valid') +os.makedirs(train_path, exist_ok=True) +os.makedirs(valid_path, exist_ok=True) + +# Dapatkan daftar semua folder kelas di direktori sumber +try: + class_folders = [f for f in os.listdir(SOURCE_DIR) if os.path.isdir(os.path.join(SOURCE_DIR, f))] + if not class_folders: + raise FileNotFoundError +except FileNotFoundError: + print(f"!!! ERROR: Folder '{SOURCE_DIR}' tidak ditemukan atau kosong.") + print("Pastikan Anda sudah mengekstrak dataset Kaggle ke dalam folder tersebut.") + exit() + +print(f"Ditemukan {len(class_folders)} kelas: {class_folders}") + +# Loop melalui setiap folder kelas +for cls in class_folders: + source_class_path = os.path.join(SOURCE_DIR, cls) + + # Buat subfolder kelas di dalam train dan valid + train_class_path = os.path.join(train_path, cls) + valid_class_path = os.path.join(valid_path, cls) + os.makedirs(train_class_path, exist_ok=True) + os.makedirs(valid_class_path, exist_ok=True) + + # Dapatkan semua file gambar untuk kelas ini + images = [f for f in os.listdir(source_class_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))] + + # Acak urutan gambar + random.shuffle(images) + + # Hitung titik pembagian + split_point = math.floor(len(images) * TRAIN_RATIO) + + # Bagi daftar gambar menjadi train dan valid + train_images = images[:split_point] + valid_images = images[split_point:] + + print(f" Kelas '{cls}': {len(train_images)} train, {len(valid_images)} valid") + + # Salin file-file ke folder tujuan + for img in train_images: + shutil.copy(os.path.join(source_class_path, img), os.path.join(train_class_path, img)) + + for img in valid_images: + shutil.copy(os.path.join(source_class_path, img), os.path.join(valid_class_path, img)) + +print("\n--- Proses split data selesai! ---") +print(f"Folder '{TARGET_DIR}' dengan struktur 'train' dan 'valid' telah berhasil dibuat.") \ No newline at end of file diff --git a/.history/src/split_dataset_20250924192749.py b/.history/src/split_dataset_20250924192749.py new file mode 100644 index 0000000000000000000000000000000000000000..ae42090e1d08fa03823bd6b5d95d070861eb0177 --- /dev/null +++ b/.history/src/split_dataset_20250924192749.py @@ -0,0 +1,74 @@ +# split_data.py + +import os +import shutil +import random +import math + +# --- KONFIGURASI --- +# 1. Ganti 'original_dataset' dengan nama folder tempat Anda mengekstrak data Kaggle +SOURCE_DIR = 'data/dataset' +# 2. Ini adalah folder tujuan tempat 'train' dan 'valid' akan dibuat +TARGET_DIR = 'data' +# 3. Rasio pembagian (0.8 berarti 80% untuk training) +TRAIN_RATIO = 0.8 + +# --- SCRIPT UTAMA --- +print(f"Memulai proses split data dari folder '{SOURCE_DIR}'...") + +# Hapus folder target jika sudah ada untuk memulai dari awal +if os.path.exists(TARGET_DIR): + shutil.rmtree(TARGET_DIR) + +# Buat struktur folder target (train dan valid) +train_path = os.path.join(TARGET_DIR, 'train') +valid_path = os.path.join(TARGET_DIR, 'valid') +os.makedirs(train_path, exist_ok=True) +os.makedirs(valid_path, exist_ok=True) + +# Dapatkan daftar semua folder kelas di direktori sumber +try: + class_folders = [f for f in os.listdir(SOURCE_DIR) if os.path.isdir(os.path.join(SOURCE_DIR, f))] + if not class_folders: + raise FileNotFoundError +except FileNotFoundError: + print(f"!!! ERROR: Folder '{SOURCE_DIR}' tidak ditemukan atau kosong.") + print("Pastikan Anda sudah mengekstrak dataset Kaggle ke dalam folder tersebut.") + exit() + +print(f"Ditemukan {len(class_folders)} kelas: {class_folders}") + +# Loop melalui setiap folder kelas +for cls in class_folders: + source_class_path = os.path.join(SOURCE_DIR, cls) + + # Buat subfolder kelas di dalam train dan valid + train_class_path = os.path.join(train_path, cls) + valid_class_path = os.path.join(valid_path, cls) + os.makedirs(train_class_path, exist_ok=True) + os.makedirs(valid_class_path, exist_ok=True) + + # Dapatkan semua file gambar untuk kelas ini + images = [f for f in os.listdir(source_class_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))] + + # Acak urutan gambar + random.shuffle(images) + + # Hitung titik pembagian + split_point = math.floor(len(images) * TRAIN_RATIO) + + # Bagi daftar gambar menjadi train dan valid + train_images = images[:split_point] + valid_images = images[split_point:] + + print(f" Kelas '{cls}': {len(train_images)} train, {len(valid_images)} valid") + + # Salin file-file ke folder tujuan + for img in train_images: + shutil.copy(os.path.join(source_class_path, img), os.path.join(train_class_path, img)) + + for img in valid_images: + shutil.copy(os.path.join(source_class_path, img), os.path.join(valid_class_path, img)) + +print("\n--- Proses split data selesai! ---") +print(f"Folder '{TARGET_DIR}' dengan struktur 'train' dan 'valid' telah berhasil dibuat.") \ No newline at end of file diff --git a/.history/src/split_dataset_20250924193438.py b/.history/src/split_dataset_20250924193438.py new file mode 100644 index 0000000000000000000000000000000000000000..c988a253ff2a0f3b4bee655ee64d9f0fee80f6e3 --- /dev/null +++ b/.history/src/split_dataset_20250924193438.py @@ -0,0 +1,74 @@ +# split_data.py + +import os +import shutil +import random +import math + +# --- KONFIGURASI --- +# 1. Ganti 'original_dataset' dengan nama folder tempat Anda mengekstrak data Kaggle +SOURCE_DIR = 'dataset' +# 2. Ini adalah folder tujuan tempat 'train' dan 'valid' akan dibuat +TARGET_DIR = 'data' +# 3. Rasio pembagian (0.8 berarti 80% untuk training) +TRAIN_RATIO = 0.8 + +# --- SCRIPT UTAMA --- +print(f"Memulai proses split data dari folder '{SOURCE_DIR}'...") + +# Hapus folder target jika sudah ada untuk memulai dari awal +if os.path.exists(TARGET_DIR): + shutil.rmtree(TARGET_DIR) + +# Buat struktur folder target (train dan valid) +train_path = os.path.join(TARGET_DIR, 'train') +valid_path = os.path.join(TARGET_DIR, 'valid') +os.makedirs(train_path, exist_ok=True) +os.makedirs(valid_path, exist_ok=True) + +# Dapatkan daftar semua folder kelas di direktori sumber +try: + class_folders = [f for f in os.listdir(SOURCE_DIR) if os.path.isdir(os.path.join(SOURCE_DIR, f))] + if not class_folders: + raise FileNotFoundError +except FileNotFoundError: + print(f"!!! ERROR: Folder '{SOURCE_DIR}' tidak ditemukan atau kosong.") + print("Pastikan Anda sudah mengekstrak dataset Kaggle ke dalam folder tersebut.") + exit() + +print(f"Ditemukan {len(class_folders)} kelas: {class_folders}") + +# Loop melalui setiap folder kelas +for cls in class_folders: + source_class_path = os.path.join(SOURCE_DIR, cls) + + # Buat subfolder kelas di dalam train dan valid + train_class_path = os.path.join(train_path, cls) + valid_class_path = os.path.join(valid_path, cls) + os.makedirs(train_class_path, exist_ok=True) + os.makedirs(valid_class_path, exist_ok=True) + + # Dapatkan semua file gambar untuk kelas ini + images = [f for f in os.listdir(source_class_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))] + + # Acak urutan gambar + random.shuffle(images) + + # Hitung titik pembagian + split_point = math.floor(len(images) * TRAIN_RATIO) + + # Bagi daftar gambar menjadi train dan valid + train_images = images[:split_point] + valid_images = images[split_point:] + + print(f" Kelas '{cls}': {len(train_images)} train, {len(valid_images)} valid") + + # Salin file-file ke folder tujuan + for img in train_images: + shutil.copy(os.path.join(source_class_path, img), os.path.join(train_class_path, img)) + + for img in valid_images: + shutil.copy(os.path.join(source_class_path, img), os.path.join(valid_class_path, img)) + +print("\n--- Proses split data selesai! ---") +print(f"Folder '{TARGET_DIR}' dengan struktur 'train' dan 'valid' telah berhasil dibuat.") \ No newline at end of file diff --git a/.history/src/train_20250923122231.py b/.history/src/train_20250923122231.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.history/src/train_20250923122723.py b/.history/src/train_20250923122723.py new file mode 100644 index 0000000000000000000000000000000000000000..46ebbfef1b79f27e2a4e114c070990894952c05b --- /dev/null +++ b/.history/src/train_20250923122723.py @@ -0,0 +1,11 @@ +# src/train.py +from src import CustomDataset, get_vit_model + +if __name__ == "__main__": + # Coba inisialisasi dataset + dataset = CustomDataset("data/") + print("Dataset length:", len(dataset)) + + # Coba load model + model = get_vit_model(num_classes=3) + print(model) diff --git a/.history/src/train_20250923140153.py b/.history/src/train_20250923140153.py new file mode 100644 index 0000000000000000000000000000000000000000..4d47e012b5e93274c7bb98ea372fd762d0a2e53d --- /dev/null +++ b/.history/src/train_20250923140153.py @@ -0,0 +1,186 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Sebagian besar diambil dari rekomendasi metodologi Anda +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = '../data' # Pastikan folder 'data' hasil prepare_dataset.py ada di sini +OUTPUT_DIR = '../outputs' +IMAGE_SIZE = 224 # Ukuran input untuk ViT (bisa 224x224 atau 384x384) +BATCH_SIZE = 32 +EPOCHS = 50 # Total epoch, tapi kita akan pakai Early Stopping +LEARNING_RATE_HEAD = 1e-3 # LR untuk melatih classifier head saja +LEARNING_RATE_FINETUNE = 3e-5 # LR untuk fine-tuning seluruh model +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'vit_mae_cataract_classifier.pth' +NUM_CLASSES = 4 # Sesuaikan dengan jumlah kelas katarak Anda + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + """ + Menghitung bobot untuk setiap kelas berdasarkan frekuensinya (1 / sqrt(freq)). + Ini adalah implementasi dari "Weighted loss" di metodologi Anda. + """ + # Hitung jumlah sampel per kelas + class_counts = np.bincount(dataset.targets) + # Hitung bobot: 1 / sqrt(frekuensi) + class_weights = 1. / np.sqrt(class_counts) + # Normalisasi bobot + class_weights = class_weights / np.sum(class_weights) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + all_preds = [] + all_labels = [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + # Cetak laporan klasifikasi (precision, recall, f1-score per kelas) + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)])) + + return epoch_loss, epoch_acc + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan & Split Data + # Kita asumsikan ini sudah dilakukan oleh prepare_dataset.py + # get_dataloaders akan mengambil data dari folder 'data/train' dan 'data/valid' + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + # B. Arsitektur & Pretraining + # Menggunakan ViT pretrained (MAE) dari model.py + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menangani Imbalance + # Hitung bobot kelas dari dataset training + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + # Gunakan Weighted Cross-Entropy Loss + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + # Bekukan semua layer kecuali head + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + # Latih head selama beberapa epoch + for epoch in range(5): # Misal 5 epoch untuk head + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + # Cairkan semua layer + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + # Gunakan scheduler untuk menyesuaikan learning rate + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + # E. Loop Training Utama dengan Regularisasi & Hyperparams + history = { + 'train_loss': [], 'train_acc': [], + 'valid_loss': [], 'valid_acc': [] + } + best_valid_acc = 0.0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() # Update learning rate + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + # Simpan riwayat + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # Simpan model dengan akurasi validasi terbaik (Early stopping sederhana) + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + + # F. Evaluasi & Reporting + # Simpan plot akurasi dan loss + save_plots( + history['train_acc'], history['valid_acc'], + history['train_loss'], history['valid_loss'], + OUTPUT_DIR + ) + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250923140307.py b/.history/src/train_20250923140307.py new file mode 100644 index 0000000000000000000000000000000000000000..30b16d558a6556fd2d4219a6afbac44e14479383 --- /dev/null +++ b/.history/src/train_20250923140307.py @@ -0,0 +1,186 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Sebagian besar diambil dari rekomendasi metodologi Anda +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = '../data/split data' # Pastikan folder 'data' hasil prepare_dataset.py ada di sini +OUTPUT_DIR = '../outputs' +IMAGE_SIZE = 224 # Ukuran input untuk ViT (bisa 224x224 atau 384x384) +BATCH_SIZE = 32 +EPOCHS = 50 # Total epoch, tapi kita akan pakai Early Stopping +LEARNING_RATE_HEAD = 1e-3 # LR untuk melatih classifier head saja +LEARNING_RATE_FINETUNE = 3e-5 # LR untuk fine-tuning seluruh model +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'vit_mae_cataract_classifier.pth' +NUM_CLASSES = 4 # Sesuaikan dengan jumlah kelas katarak Anda + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + """ + Menghitung bobot untuk setiap kelas berdasarkan frekuensinya (1 / sqrt(freq)). + Ini adalah implementasi dari "Weighted loss" di metodologi Anda. + """ + # Hitung jumlah sampel per kelas + class_counts = np.bincount(dataset.targets) + # Hitung bobot: 1 / sqrt(frekuensi) + class_weights = 1. / np.sqrt(class_counts) + # Normalisasi bobot + class_weights = class_weights / np.sum(class_weights) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + all_preds = [] + all_labels = [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + # Cetak laporan klasifikasi (precision, recall, f1-score per kelas) + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)])) + + return epoch_loss, epoch_acc + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan & Split Data + # Kita asumsikan ini sudah dilakukan oleh prepare_dataset.py + # get_dataloaders akan mengambil data dari folder 'data/train' dan 'data/valid' + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + # B. Arsitektur & Pretraining + # Menggunakan ViT pretrained (MAE) dari model.py + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menangani Imbalance + # Hitung bobot kelas dari dataset training + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + # Gunakan Weighted Cross-Entropy Loss + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + # Bekukan semua layer kecuali head + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + # Latih head selama beberapa epoch + for epoch in range(5): # Misal 5 epoch untuk head + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + # Cairkan semua layer + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + # Gunakan scheduler untuk menyesuaikan learning rate + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + # E. Loop Training Utama dengan Regularisasi & Hyperparams + history = { + 'train_loss': [], 'train_acc': [], + 'valid_loss': [], 'valid_acc': [] + } + best_valid_acc = 0.0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() # Update learning rate + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + # Simpan riwayat + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # Simpan model dengan akurasi validasi terbaik (Early stopping sederhana) + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + + # F. Evaluasi & Reporting + # Simpan plot akurasi dan loss + save_plots( + history['train_acc'], history['valid_acc'], + history['train_loss'], history['valid_loss'], + OUTPUT_DIR + ) + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250923140310.py b/.history/src/train_20250923140310.py new file mode 100644 index 0000000000000000000000000000000000000000..30a44df8a57ecf488872c69dff9724131ab32184 --- /dev/null +++ b/.history/src/train_20250923140310.py @@ -0,0 +1,186 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Sebagian besar diambil dari rekomendasi metodologi Anda +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = '/data/split data' # Pastikan folder 'data' hasil prepare_dataset.py ada di sini +OUTPUT_DIR = '../outputs' +IMAGE_SIZE = 224 # Ukuran input untuk ViT (bisa 224x224 atau 384x384) +BATCH_SIZE = 32 +EPOCHS = 50 # Total epoch, tapi kita akan pakai Early Stopping +LEARNING_RATE_HEAD = 1e-3 # LR untuk melatih classifier head saja +LEARNING_RATE_FINETUNE = 3e-5 # LR untuk fine-tuning seluruh model +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'vit_mae_cataract_classifier.pth' +NUM_CLASSES = 4 # Sesuaikan dengan jumlah kelas katarak Anda + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + """ + Menghitung bobot untuk setiap kelas berdasarkan frekuensinya (1 / sqrt(freq)). + Ini adalah implementasi dari "Weighted loss" di metodologi Anda. + """ + # Hitung jumlah sampel per kelas + class_counts = np.bincount(dataset.targets) + # Hitung bobot: 1 / sqrt(frekuensi) + class_weights = 1. / np.sqrt(class_counts) + # Normalisasi bobot + class_weights = class_weights / np.sum(class_weights) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + all_preds = [] + all_labels = [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + # Cetak laporan klasifikasi (precision, recall, f1-score per kelas) + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)])) + + return epoch_loss, epoch_acc + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan & Split Data + # Kita asumsikan ini sudah dilakukan oleh prepare_dataset.py + # get_dataloaders akan mengambil data dari folder 'data/train' dan 'data/valid' + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + # B. Arsitektur & Pretraining + # Menggunakan ViT pretrained (MAE) dari model.py + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menangani Imbalance + # Hitung bobot kelas dari dataset training + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + # Gunakan Weighted Cross-Entropy Loss + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + # Bekukan semua layer kecuali head + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + # Latih head selama beberapa epoch + for epoch in range(5): # Misal 5 epoch untuk head + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + # Cairkan semua layer + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + # Gunakan scheduler untuk menyesuaikan learning rate + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + # E. Loop Training Utama dengan Regularisasi & Hyperparams + history = { + 'train_loss': [], 'train_acc': [], + 'valid_loss': [], 'valid_acc': [] + } + best_valid_acc = 0.0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() # Update learning rate + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + # Simpan riwayat + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # Simpan model dengan akurasi validasi terbaik (Early stopping sederhana) + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + + # F. Evaluasi & Reporting + # Simpan plot akurasi dan loss + save_plots( + history['train_acc'], history['valid_acc'], + history['train_loss'], history['valid_loss'], + OUTPUT_DIR + ) + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250923140317.py b/.history/src/train_20250923140317.py new file mode 100644 index 0000000000000000000000000000000000000000..11cc88ae611608c283ca6a025634e050dbf1250c --- /dev/null +++ b/.history/src/train_20250923140317.py @@ -0,0 +1,186 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Sebagian besar diambil dari rekomendasi metodologi Anda +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = '/data/split data' # Pastikan folder 'data' hasil prepare_dataset.py ada di sini +OUTPUT_DIR = '/outputs' +IMAGE_SIZE = 224 # Ukuran input untuk ViT (bisa 224x224 atau 384x384) +BATCH_SIZE = 32 +EPOCHS = 50 # Total epoch, tapi kita akan pakai Early Stopping +LEARNING_RATE_HEAD = 1e-3 # LR untuk melatih classifier head saja +LEARNING_RATE_FINETUNE = 3e-5 # LR untuk fine-tuning seluruh model +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'vit_mae_cataract_classifier.pth' +NUM_CLASSES = 4 # Sesuaikan dengan jumlah kelas katarak Anda + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + """ + Menghitung bobot untuk setiap kelas berdasarkan frekuensinya (1 / sqrt(freq)). + Ini adalah implementasi dari "Weighted loss" di metodologi Anda. + """ + # Hitung jumlah sampel per kelas + class_counts = np.bincount(dataset.targets) + # Hitung bobot: 1 / sqrt(frekuensi) + class_weights = 1. / np.sqrt(class_counts) + # Normalisasi bobot + class_weights = class_weights / np.sum(class_weights) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + all_preds = [] + all_labels = [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + # Cetak laporan klasifikasi (precision, recall, f1-score per kelas) + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)])) + + return epoch_loss, epoch_acc + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan & Split Data + # Kita asumsikan ini sudah dilakukan oleh prepare_dataset.py + # get_dataloaders akan mengambil data dari folder 'data/train' dan 'data/valid' + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + # B. Arsitektur & Pretraining + # Menggunakan ViT pretrained (MAE) dari model.py + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menangani Imbalance + # Hitung bobot kelas dari dataset training + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + # Gunakan Weighted Cross-Entropy Loss + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + # Bekukan semua layer kecuali head + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + # Latih head selama beberapa epoch + for epoch in range(5): # Misal 5 epoch untuk head + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + # Cairkan semua layer + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + # Gunakan scheduler untuk menyesuaikan learning rate + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + # E. Loop Training Utama dengan Regularisasi & Hyperparams + history = { + 'train_loss': [], 'train_acc': [], + 'valid_loss': [], 'valid_acc': [] + } + best_valid_acc = 0.0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() # Update learning rate + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + # Simpan riwayat + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # Simpan model dengan akurasi validasi terbaik (Early stopping sederhana) + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + + # F. Evaluasi & Reporting + # Simpan plot akurasi dan loss + save_plots( + history['train_acc'], history['valid_acc'], + history['train_loss'], history['valid_loss'], + OUTPUT_DIR + ) + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250923140321.py b/.history/src/train_20250923140321.py new file mode 100644 index 0000000000000000000000000000000000000000..b0adf5aa4594a9aa7cf45de85b9871fb7da6eb60 --- /dev/null +++ b/.history/src/train_20250923140321.py @@ -0,0 +1,186 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Sebagian besar diambil dari rekomendasi metodologi Anda +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = '/data/split data' # Pastikan folder 'data' hasil prepare_dataset.py ada di sini +OUTPUT_DIR = '/outputs/' +IMAGE_SIZE = 224 # Ukuran input untuk ViT (bisa 224x224 atau 384x384) +BATCH_SIZE = 32 +EPOCHS = 50 # Total epoch, tapi kita akan pakai Early Stopping +LEARNING_RATE_HEAD = 1e-3 # LR untuk melatih classifier head saja +LEARNING_RATE_FINETUNE = 3e-5 # LR untuk fine-tuning seluruh model +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'vit_mae_cataract_classifier.pth' +NUM_CLASSES = 4 # Sesuaikan dengan jumlah kelas katarak Anda + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + """ + Menghitung bobot untuk setiap kelas berdasarkan frekuensinya (1 / sqrt(freq)). + Ini adalah implementasi dari "Weighted loss" di metodologi Anda. + """ + # Hitung jumlah sampel per kelas + class_counts = np.bincount(dataset.targets) + # Hitung bobot: 1 / sqrt(frekuensi) + class_weights = 1. / np.sqrt(class_counts) + # Normalisasi bobot + class_weights = class_weights / np.sum(class_weights) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + all_preds = [] + all_labels = [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + # Cetak laporan klasifikasi (precision, recall, f1-score per kelas) + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)])) + + return epoch_loss, epoch_acc + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan & Split Data + # Kita asumsikan ini sudah dilakukan oleh prepare_dataset.py + # get_dataloaders akan mengambil data dari folder 'data/train' dan 'data/valid' + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + # B. Arsitektur & Pretraining + # Menggunakan ViT pretrained (MAE) dari model.py + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menangani Imbalance + # Hitung bobot kelas dari dataset training + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + # Gunakan Weighted Cross-Entropy Loss + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + # Bekukan semua layer kecuali head + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + # Latih head selama beberapa epoch + for epoch in range(5): # Misal 5 epoch untuk head + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + # Cairkan semua layer + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + # Gunakan scheduler untuk menyesuaikan learning rate + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + # E. Loop Training Utama dengan Regularisasi & Hyperparams + history = { + 'train_loss': [], 'train_acc': [], + 'valid_loss': [], 'valid_acc': [] + } + best_valid_acc = 0.0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() # Update learning rate + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + # Simpan riwayat + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # Simpan model dengan akurasi validasi terbaik (Early stopping sederhana) + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + + # F. Evaluasi & Reporting + # Simpan plot akurasi dan loss + save_plots( + history['train_acc'], history['valid_acc'], + history['train_loss'], history['valid_loss'], + OUTPUT_DIR + ) + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250923140323.py b/.history/src/train_20250923140323.py new file mode 100644 index 0000000000000000000000000000000000000000..7b5c78a2bda7c1d263d5562b09eab283faa5fe9c --- /dev/null +++ b/.history/src/train_20250923140323.py @@ -0,0 +1,186 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Sebagian besar diambil dari rekomendasi metodologi Anda +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = '/data/split data' # Pastikan folder 'data' hasil prepare_dataset.py ada di sini +OUTPUT_DIR = '/outputs/models' # Folder untuk menyimpan model dan plot +IMAGE_SIZE = 224 # Ukuran input untuk ViT (bisa 224x224 atau 384x384) +BATCH_SIZE = 32 +EPOCHS = 50 # Total epoch, tapi kita akan pakai Early Stopping +LEARNING_RATE_HEAD = 1e-3 # LR untuk melatih classifier head saja +LEARNING_RATE_FINETUNE = 3e-5 # LR untuk fine-tuning seluruh model +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'vit_mae_cataract_classifier.pth' +NUM_CLASSES = 4 # Sesuaikan dengan jumlah kelas katarak Anda + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + """ + Menghitung bobot untuk setiap kelas berdasarkan frekuensinya (1 / sqrt(freq)). + Ini adalah implementasi dari "Weighted loss" di metodologi Anda. + """ + # Hitung jumlah sampel per kelas + class_counts = np.bincount(dataset.targets) + # Hitung bobot: 1 / sqrt(frekuensi) + class_weights = 1. / np.sqrt(class_counts) + # Normalisasi bobot + class_weights = class_weights / np.sum(class_weights) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + all_preds = [] + all_labels = [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + # Cetak laporan klasifikasi (precision, recall, f1-score per kelas) + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)])) + + return epoch_loss, epoch_acc + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan & Split Data + # Kita asumsikan ini sudah dilakukan oleh prepare_dataset.py + # get_dataloaders akan mengambil data dari folder 'data/train' dan 'data/valid' + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + # B. Arsitektur & Pretraining + # Menggunakan ViT pretrained (MAE) dari model.py + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menangani Imbalance + # Hitung bobot kelas dari dataset training + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + # Gunakan Weighted Cross-Entropy Loss + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + # Bekukan semua layer kecuali head + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + # Latih head selama beberapa epoch + for epoch in range(5): # Misal 5 epoch untuk head + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + # Cairkan semua layer + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + # Gunakan scheduler untuk menyesuaikan learning rate + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + # E. Loop Training Utama dengan Regularisasi & Hyperparams + history = { + 'train_loss': [], 'train_acc': [], + 'valid_loss': [], 'valid_acc': [] + } + best_valid_acc = 0.0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() # Update learning rate + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + # Simpan riwayat + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # Simpan model dengan akurasi validasi terbaik (Early stopping sederhana) + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + + # F. Evaluasi & Reporting + # Simpan plot akurasi dan loss + save_plots( + history['train_acc'], history['valid_acc'], + history['train_loss'], history['valid_loss'], + OUTPUT_DIR + ) + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250923143438.py b/.history/src/train_20250923143438.py new file mode 100644 index 0000000000000000000000000000000000000000..9ea946d8175b0e1e52b90d0173ba50408b76dcaa --- /dev/null +++ b/.history/src/train_20250923143438.py @@ -0,0 +1,186 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Sebagian besar diambil dari rekomendasi metodologi Anda +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = '/data/split data' # Pastikan folder 'data' hasil prepare_dataset.py ada di sini +OUTPUT_DIR = '/outputs/models' # Folder untuk menyimpan model dan plot +IMAGE_SIZE = 224 # Ukuran input untuk ViT (bisa 224x224 atau 384x384) +BATCH_SIZE = 16 +EPOCHS = 50 # Total epoch, tapi kita akan pakai Early Stopping +LEARNING_RATE_HEAD = 1e-3 # LR untuk melatih classifier head saja +LEARNING_RATE_FINETUNE = 3e-5 # LR untuk fine-tuning seluruh model +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'vit_mae_cataract_classifier.pth' +NUM_CLASSES = 4 # Sesuaikan dengan jumlah kelas katarak Anda + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + """ + Menghitung bobot untuk setiap kelas berdasarkan frekuensinya (1 / sqrt(freq)). + Ini adalah implementasi dari "Weighted loss" di metodologi Anda. + """ + # Hitung jumlah sampel per kelas + class_counts = np.bincount(dataset.targets) + # Hitung bobot: 1 / sqrt(frekuensi) + class_weights = 1. / np.sqrt(class_counts) + # Normalisasi bobot + class_weights = class_weights / np.sum(class_weights) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + all_preds = [] + all_labels = [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + # Cetak laporan klasifikasi (precision, recall, f1-score per kelas) + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)])) + + return epoch_loss, epoch_acc + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan & Split Data + # Kita asumsikan ini sudah dilakukan oleh prepare_dataset.py + # get_dataloaders akan mengambil data dari folder 'data/train' dan 'data/valid' + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + # B. Arsitektur & Pretraining + # Menggunakan ViT pretrained (MAE) dari model.py + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menangani Imbalance + # Hitung bobot kelas dari dataset training + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + # Gunakan Weighted Cross-Entropy Loss + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + # Bekukan semua layer kecuali head + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + # Latih head selama beberapa epoch + for epoch in range(5): # Misal 5 epoch untuk head + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + # Cairkan semua layer + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + # Gunakan scheduler untuk menyesuaikan learning rate + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + # E. Loop Training Utama dengan Regularisasi & Hyperparams + history = { + 'train_loss': [], 'train_acc': [], + 'valid_loss': [], 'valid_acc': [] + } + best_valid_acc = 0.0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() # Update learning rate + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + # Simpan riwayat + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # Simpan model dengan akurasi validasi terbaik (Early stopping sederhana) + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + + # F. Evaluasi & Reporting + # Simpan plot akurasi dan loss + save_plots( + history['train_acc'], history['valid_acc'], + history['train_loss'], history['valid_loss'], + OUTPUT_DIR + ) + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250923144927.py b/.history/src/train_20250923144927.py new file mode 100644 index 0000000000000000000000000000000000000000..d52a263725526f320661df7f2fdee018b7d8e8b8 --- /dev/null +++ b/.history/src/train_20250923144927.py @@ -0,0 +1,186 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Sebagian besar diambil dari rekomendasi metodologi Anda +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data/split data' # Pastikan folder 'data' hasil prepare_dataset.py ada di sini +OUTPUT_DIR = '/outputs/models' # Folder untuk menyimpan model dan plot +IMAGE_SIZE = 224 # Ukuran input untuk ViT (bisa 224x224 atau 384x384) +BATCH_SIZE = 16 +EPOCHS = 50 # Total epoch, tapi kita akan pakai Early Stopping +LEARNING_RATE_HEAD = 1e-3 # LR untuk melatih classifier head saja +LEARNING_RATE_FINETUNE = 3e-5 # LR untuk fine-tuning seluruh model +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'vit_mae_cataract_classifier.pth' +NUM_CLASSES = 4 # Sesuaikan dengan jumlah kelas katarak Anda + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + """ + Menghitung bobot untuk setiap kelas berdasarkan frekuensinya (1 / sqrt(freq)). + Ini adalah implementasi dari "Weighted loss" di metodologi Anda. + """ + # Hitung jumlah sampel per kelas + class_counts = np.bincount(dataset.targets) + # Hitung bobot: 1 / sqrt(frekuensi) + class_weights = 1. / np.sqrt(class_counts) + # Normalisasi bobot + class_weights = class_weights / np.sum(class_weights) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + all_preds = [] + all_labels = [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + # Cetak laporan klasifikasi (precision, recall, f1-score per kelas) + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)])) + + return epoch_loss, epoch_acc + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan & Split Data + # Kita asumsikan ini sudah dilakukan oleh prepare_dataset.py + # get_dataloaders akan mengambil data dari folder 'data/train' dan 'data/valid' + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + # B. Arsitektur & Pretraining + # Menggunakan ViT pretrained (MAE) dari model.py + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menangani Imbalance + # Hitung bobot kelas dari dataset training + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + # Gunakan Weighted Cross-Entropy Loss + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + # Bekukan semua layer kecuali head + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + # Latih head selama beberapa epoch + for epoch in range(5): # Misal 5 epoch untuk head + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + # Cairkan semua layer + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + # Gunakan scheduler untuk menyesuaikan learning rate + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + # E. Loop Training Utama dengan Regularisasi & Hyperparams + history = { + 'train_loss': [], 'train_acc': [], + 'valid_loss': [], 'valid_acc': [] + } + best_valid_acc = 0.0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() # Update learning rate + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + # Simpan riwayat + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # Simpan model dengan akurasi validasi terbaik (Early stopping sederhana) + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + + # F. Evaluasi & Reporting + # Simpan plot akurasi dan loss + save_plots( + history['train_acc'], history['valid_acc'], + history['train_loss'], history['valid_loss'], + OUTPUT_DIR + ) + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250923144931.py b/.history/src/train_20250923144931.py new file mode 100644 index 0000000000000000000000000000000000000000..98f38106208d9589fec6878029cfe7b8adea3328 --- /dev/null +++ b/.history/src/train_20250923144931.py @@ -0,0 +1,186 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Sebagian besar diambil dari rekomendasi metodologi Anda +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data/split data' # Pastikan folder 'data' hasil prepare_dataset.py ada di sini +OUTPUT_DIR = 'outputs/models' # Folder untuk menyimpan model dan plot +IMAGE_SIZE = 224 # Ukuran input untuk ViT (bisa 224x224 atau 384x384) +BATCH_SIZE = 16 +EPOCHS = 50 # Total epoch, tapi kita akan pakai Early Stopping +LEARNING_RATE_HEAD = 1e-3 # LR untuk melatih classifier head saja +LEARNING_RATE_FINETUNE = 3e-5 # LR untuk fine-tuning seluruh model +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'vit_mae_cataract_classifier.pth' +NUM_CLASSES = 4 # Sesuaikan dengan jumlah kelas katarak Anda + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + """ + Menghitung bobot untuk setiap kelas berdasarkan frekuensinya (1 / sqrt(freq)). + Ini adalah implementasi dari "Weighted loss" di metodologi Anda. + """ + # Hitung jumlah sampel per kelas + class_counts = np.bincount(dataset.targets) + # Hitung bobot: 1 / sqrt(frekuensi) + class_weights = 1. / np.sqrt(class_counts) + # Normalisasi bobot + class_weights = class_weights / np.sum(class_weights) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + all_preds = [] + all_labels = [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + # Cetak laporan klasifikasi (precision, recall, f1-score per kelas) + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)])) + + return epoch_loss, epoch_acc + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan & Split Data + # Kita asumsikan ini sudah dilakukan oleh prepare_dataset.py + # get_dataloaders akan mengambil data dari folder 'data/train' dan 'data/valid' + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + # B. Arsitektur & Pretraining + # Menggunakan ViT pretrained (MAE) dari model.py + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menangani Imbalance + # Hitung bobot kelas dari dataset training + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + # Gunakan Weighted Cross-Entropy Loss + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + # Bekukan semua layer kecuali head + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + # Latih head selama beberapa epoch + for epoch in range(5): # Misal 5 epoch untuk head + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + # Cairkan semua layer + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + # Gunakan scheduler untuk menyesuaikan learning rate + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + # E. Loop Training Utama dengan Regularisasi & Hyperparams + history = { + 'train_loss': [], 'train_acc': [], + 'valid_loss': [], 'valid_acc': [] + } + best_valid_acc = 0.0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() # Update learning rate + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + # Simpan riwayat + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # Simpan model dengan akurasi validasi terbaik (Early stopping sederhana) + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + + # F. Evaluasi & Reporting + # Simpan plot akurasi dan loss + save_plots( + history['train_acc'], history['valid_acc'], + history['train_loss'], history['valid_loss'], + OUTPUT_DIR + ) + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250924193831.py b/.history/src/train_20250924193831.py new file mode 100644 index 0000000000000000000000000000000000000000..47f2769e87dbaff4b75799e1f6680bb63ca7cbfe --- /dev/null +++ b/.history/src/train_20250924193831.py @@ -0,0 +1,186 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Sebagian besar diambil dari rekomendasi metodologi Anda +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' # Pastikan folder 'data' hasil prepare_dataset.py ada di sini +OUTPUT_DIR = 'outputs/models' # Folder untuk menyimpan model dan plot +IMAGE_SIZE = 224 # Ukuran input untuk ViT (bisa 224x224 atau 384x384) +BATCH_SIZE = 16 +EPOCHS = 50 # Total epoch, tapi kita akan pakai Early Stopping +LEARNING_RATE_HEAD = 1e-3 # LR untuk melatih classifier head saja +LEARNING_RATE_FINETUNE = 3e-5 # LR untuk fine-tuning seluruh model +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'vit_mae_cataract_classifier.pth' +NUM_CLASSES = 4 # Sesuaikan dengan jumlah kelas katarak Anda + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + """ + Menghitung bobot untuk setiap kelas berdasarkan frekuensinya (1 / sqrt(freq)). + Ini adalah implementasi dari "Weighted loss" di metodologi Anda. + """ + # Hitung jumlah sampel per kelas + class_counts = np.bincount(dataset.targets) + # Hitung bobot: 1 / sqrt(frekuensi) + class_weights = 1. / np.sqrt(class_counts) + # Normalisasi bobot + class_weights = class_weights / np.sum(class_weights) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + all_preds = [] + all_labels = [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + # Cetak laporan klasifikasi (precision, recall, f1-score per kelas) + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)])) + + return epoch_loss, epoch_acc + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan & Split Data + # Kita asumsikan ini sudah dilakukan oleh prepare_dataset.py + # get_dataloaders akan mengambil data dari folder 'data/train' dan 'data/valid' + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + # B. Arsitektur & Pretraining + # Menggunakan ViT pretrained (MAE) dari model.py + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menangani Imbalance + # Hitung bobot kelas dari dataset training + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + # Gunakan Weighted Cross-Entropy Loss + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + # Bekukan semua layer kecuali head + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + # Latih head selama beberapa epoch + for epoch in range(5): # Misal 5 epoch untuk head + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + # Cairkan semua layer + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + # Gunakan scheduler untuk menyesuaikan learning rate + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + # E. Loop Training Utama dengan Regularisasi & Hyperparams + history = { + 'train_loss': [], 'train_acc': [], + 'valid_loss': [], 'valid_acc': [] + } + best_valid_acc = 0.0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() # Update learning rate + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + # Simpan riwayat + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # Simpan model dengan akurasi validasi terbaik (Early stopping sederhana) + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + + # F. Evaluasi & Reporting + # Simpan plot akurasi dan loss + save_plots( + history['train_acc'], history['valid_acc'], + history['train_loss'], history['valid_loss'], + OUTPUT_DIR + ) + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250924202508.py b/.history/src/train_20250924202508.py new file mode 100644 index 0000000000000000000000000000000000000000..f85736322162aa87c93481627847ee1989d7ac2b --- /dev/null +++ b/.history/src/train_20250924202508.py @@ -0,0 +1,189 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +import seaborn as sns +from sklearn.metrics import confusion_matrix + +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Sebagian besar diambil dari rekomendasi metodologi Anda +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' # Pastikan folder 'data' hasil prepare_dataset.py ada di sini +OUTPUT_DIR = 'outputs/models' # Folder untuk menyimpan model dan plot +IMAGE_SIZE = 224 # Ukuran input untuk ViT (bisa 224x224 atau 384x384) +BATCH_SIZE = 16 +EPOCHS = 50 # Total epoch, tapi kita akan pakai Early Stopping +LEARNING_RATE_HEAD = 1e-3 # LR untuk melatih classifier head saja +LEARNING_RATE_FINETUNE = 3e-5 # LR untuk fine-tuning seluruh model +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'vit_mae_cataract_classifier.pth' +NUM_CLASSES = 4 # Sesuaikan dengan jumlah kelas katarak Anda + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + """ + Menghitung bobot untuk setiap kelas berdasarkan frekuensinya (1 / sqrt(freq)). + Ini adalah implementasi dari "Weighted loss" di metodologi Anda. + """ + # Hitung jumlah sampel per kelas + class_counts = np.bincount(dataset.targets) + # Hitung bobot: 1 / sqrt(frekuensi) + class_weights = 1. / np.sqrt(class_counts) + # Normalisasi bobot + class_weights = class_weights / np.sum(class_weights) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + all_preds = [] + all_labels = [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + # Cetak laporan klasifikasi (precision, recall, f1-score per kelas) + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)])) + + return epoch_loss, epoch_acc + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan & Split Data + # Kita asumsikan ini sudah dilakukan oleh prepare_dataset.py + # get_dataloaders akan mengambil data dari folder 'data/train' dan 'data/valid' + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + # B. Arsitektur & Pretraining + # Menggunakan ViT pretrained (MAE) dari model.py + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menangani Imbalance + # Hitung bobot kelas dari dataset training + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + # Gunakan Weighted Cross-Entropy Loss + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + # Bekukan semua layer kecuali head + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + # Latih head selama beberapa epoch + for epoch in range(5): # Misal 5 epoch untuk head + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + # Cairkan semua layer + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + # Gunakan scheduler untuk menyesuaikan learning rate + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + # E. Loop Training Utama dengan Regularisasi & Hyperparams + history = { + 'train_loss': [], 'train_acc': [], + 'valid_loss': [], 'valid_acc': [] + } + best_valid_acc = 0.0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() # Update learning rate + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + # Simpan riwayat + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # Simpan model dengan akurasi validasi terbaik (Early stopping sederhana) + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + + # F. Evaluasi & Reporting + # Simpan plot akurasi dan loss + save_plots( + history['train_acc'], history['valid_acc'], + history['train_loss'], history['valid_loss'], + OUTPUT_DIR + ) + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250924202509.py b/.history/src/train_20250924202509.py new file mode 100644 index 0000000000000000000000000000000000000000..b18aa9631ace323fa6ae5186882d0844a2f61da9 --- /dev/null +++ b/.history/src/train_20250924202509.py @@ -0,0 +1,188 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +import seaborn as sns +from sklearn.metrics import confusion_matrix +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Sebagian besar diambil dari rekomendasi metodologi Anda +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' # Pastikan folder 'data' hasil prepare_dataset.py ada di sini +OUTPUT_DIR = 'outputs/models' # Folder untuk menyimpan model dan plot +IMAGE_SIZE = 224 # Ukuran input untuk ViT (bisa 224x224 atau 384x384) +BATCH_SIZE = 16 +EPOCHS = 50 # Total epoch, tapi kita akan pakai Early Stopping +LEARNING_RATE_HEAD = 1e-3 # LR untuk melatih classifier head saja +LEARNING_RATE_FINETUNE = 3e-5 # LR untuk fine-tuning seluruh model +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'vit_mae_cataract_classifier.pth' +NUM_CLASSES = 4 # Sesuaikan dengan jumlah kelas katarak Anda + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + """ + Menghitung bobot untuk setiap kelas berdasarkan frekuensinya (1 / sqrt(freq)). + Ini adalah implementasi dari "Weighted loss" di metodologi Anda. + """ + # Hitung jumlah sampel per kelas + class_counts = np.bincount(dataset.targets) + # Hitung bobot: 1 / sqrt(frekuensi) + class_weights = 1. / np.sqrt(class_counts) + # Normalisasi bobot + class_weights = class_weights / np.sum(class_weights) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + all_preds = [] + all_labels = [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + # Cetak laporan klasifikasi (precision, recall, f1-score per kelas) + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)])) + + return epoch_loss, epoch_acc + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan & Split Data + # Kita asumsikan ini sudah dilakukan oleh prepare_dataset.py + # get_dataloaders akan mengambil data dari folder 'data/train' dan 'data/valid' + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + # B. Arsitektur & Pretraining + # Menggunakan ViT pretrained (MAE) dari model.py + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menangani Imbalance + # Hitung bobot kelas dari dataset training + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + # Gunakan Weighted Cross-Entropy Loss + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + # Bekukan semua layer kecuali head + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + # Latih head selama beberapa epoch + for epoch in range(5): # Misal 5 epoch untuk head + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + # Cairkan semua layer + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + # Gunakan scheduler untuk menyesuaikan learning rate + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + # E. Loop Training Utama dengan Regularisasi & Hyperparams + history = { + 'train_loss': [], 'train_acc': [], + 'valid_loss': [], 'valid_acc': [] + } + best_valid_acc = 0.0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() # Update learning rate + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + # Simpan riwayat + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # Simpan model dengan akurasi validasi terbaik (Early stopping sederhana) + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + + # F. Evaluasi & Reporting + # Simpan plot akurasi dan loss + save_plots( + history['train_acc'], history['valid_acc'], + history['train_loss'], history['valid_loss'], + OUTPUT_DIR + ) + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250924202747.py b/.history/src/train_20250924202747.py new file mode 100644 index 0000000000000000000000000000000000000000..6aed1980b35a32bd831b4bc5d42c9220332d2096 --- /dev/null +++ b/.history/src/train_20250924202747.py @@ -0,0 +1,188 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +import seaborn as sns +from sklearn.metrics import confusion_matrix +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Sebagian besar diambil dari rekomendasi metodologi Anda +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' # Pastikan folder 'data' hasil prepare_dataset.py ada di sini +OUTPUT_DIR = 'outputs/models' # Folder untuk menyimpan model dan plot +IMAGE_SIZE = 224 # Ukuran input untuk ViT (bisa 224x224 atau 384x384) +BATCH_SIZE = +EPOCHS = 50 # Total epoch, tapi kita akan pakai Early Stopping +LEARNING_RATE_HEAD = 1e-3 # LR untuk melatih classifier head saja +LEARNING_RATE_FINETUNE = 3e-5 # LR untuk fine-tuning seluruh model +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'vit_mae_cataract_classifier.pth' +NUM_CLASSES = 4 # Sesuaikan dengan jumlah kelas katarak Anda + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + """ + Menghitung bobot untuk setiap kelas berdasarkan frekuensinya (1 / sqrt(freq)). + Ini adalah implementasi dari "Weighted loss" di metodologi Anda. + """ + # Hitung jumlah sampel per kelas + class_counts = np.bincount(dataset.targets) + # Hitung bobot: 1 / sqrt(frekuensi) + class_weights = 1. / np.sqrt(class_counts) + # Normalisasi bobot + class_weights = class_weights / np.sum(class_weights) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + all_preds = [] + all_labels = [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + # Cetak laporan klasifikasi (precision, recall, f1-score per kelas) + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)])) + + return epoch_loss, epoch_acc + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan & Split Data + # Kita asumsikan ini sudah dilakukan oleh prepare_dataset.py + # get_dataloaders akan mengambil data dari folder 'data/train' dan 'data/valid' + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + # B. Arsitektur & Pretraining + # Menggunakan ViT pretrained (MAE) dari model.py + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menangani Imbalance + # Hitung bobot kelas dari dataset training + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + # Gunakan Weighted Cross-Entropy Loss + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + # Bekukan semua layer kecuali head + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + # Latih head selama beberapa epoch + for epoch in range(5): # Misal 5 epoch untuk head + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + # Cairkan semua layer + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + # Gunakan scheduler untuk menyesuaikan learning rate + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + # E. Loop Training Utama dengan Regularisasi & Hyperparams + history = { + 'train_loss': [], 'train_acc': [], + 'valid_loss': [], 'valid_acc': [] + } + best_valid_acc = 0.0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() # Update learning rate + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + # Simpan riwayat + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # Simpan model dengan akurasi validasi terbaik (Early stopping sederhana) + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + + # F. Evaluasi & Reporting + # Simpan plot akurasi dan loss + save_plots( + history['train_acc'], history['valid_acc'], + history['train_loss'], history['valid_loss'], + OUTPUT_DIR + ) + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250924202748.py b/.history/src/train_20250924202748.py new file mode 100644 index 0000000000000000000000000000000000000000..7b2e7052019512ec0c8eab3cae7ada860f87970a --- /dev/null +++ b/.history/src/train_20250924202748.py @@ -0,0 +1,188 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +import seaborn as sns +from sklearn.metrics import confusion_matrix +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Sebagian besar diambil dari rekomendasi metodologi Anda +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' # Pastikan folder 'data' hasil prepare_dataset.py ada di sini +OUTPUT_DIR = 'outputs/models' # Folder untuk menyimpan model dan plot +IMAGE_SIZE = 224 # Ukuran input untuk ViT (bisa 224x224 atau 384x384) +BATCH_SIZE = 32 +EPOCHS = 50 # Total epoch, tapi kita akan pakai Early Stopping +LEARNING_RATE_HEAD = 1e-3 # LR untuk melatih classifier head saja +LEARNING_RATE_FINETUNE = 3e-5 # LR untuk fine-tuning seluruh model +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'vit_mae_cataract_classifier.pth' +NUM_CLASSES = 4 # Sesuaikan dengan jumlah kelas katarak Anda + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + """ + Menghitung bobot untuk setiap kelas berdasarkan frekuensinya (1 / sqrt(freq)). + Ini adalah implementasi dari "Weighted loss" di metodologi Anda. + """ + # Hitung jumlah sampel per kelas + class_counts = np.bincount(dataset.targets) + # Hitung bobot: 1 / sqrt(frekuensi) + class_weights = 1. / np.sqrt(class_counts) + # Normalisasi bobot + class_weights = class_weights / np.sum(class_weights) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + all_preds = [] + all_labels = [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + # Cetak laporan klasifikasi (precision, recall, f1-score per kelas) + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)])) + + return epoch_loss, epoch_acc + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan & Split Data + # Kita asumsikan ini sudah dilakukan oleh prepare_dataset.py + # get_dataloaders akan mengambil data dari folder 'data/train' dan 'data/valid' + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + # B. Arsitektur & Pretraining + # Menggunakan ViT pretrained (MAE) dari model.py + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menangani Imbalance + # Hitung bobot kelas dari dataset training + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + # Gunakan Weighted Cross-Entropy Loss + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + # Bekukan semua layer kecuali head + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + # Latih head selama beberapa epoch + for epoch in range(5): # Misal 5 epoch untuk head + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + # Cairkan semua layer + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + # Gunakan scheduler untuk menyesuaikan learning rate + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + # E. Loop Training Utama dengan Regularisasi & Hyperparams + history = { + 'train_loss': [], 'train_acc': [], + 'valid_loss': [], 'valid_acc': [] + } + best_valid_acc = 0.0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() # Update learning rate + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + # Simpan riwayat + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # Simpan model dengan akurasi validasi terbaik (Early stopping sederhana) + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + + # F. Evaluasi & Reporting + # Simpan plot akurasi dan loss + save_plots( + history['train_acc'], history['valid_acc'], + history['train_loss'], history['valid_loss'], + OUTPUT_DIR + ) + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250924202751.py b/.history/src/train_20250924202751.py new file mode 100644 index 0000000000000000000000000000000000000000..22a10d3773f4337d711d914054f32ffb36885439 --- /dev/null +++ b/.history/src/train_20250924202751.py @@ -0,0 +1,188 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +import seaborn as sns +from sklearn.metrics import confusion_matrix +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Sebagian besar diambil dari rekomendasi metodologi Anda +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' # Pastikan folder 'data' hasil prepare_dataset.py ada di sini +OUTPUT_DIR = 'outputs/models' # Folder untuk menyimpan model dan plot +IMAGE_SIZE = 224 # Ukuran input untuk ViT (bisa 224x224 atau 384x384) +BATCH_SIZE = 32 +EPOCHS = 30 # Total epoch, tapi kita akan pakai Early Stopping +LEARNING_RATE_HEAD = 1e-3 # LR untuk melatih classifier head saja +LEARNING_RATE_FINETUNE = 3e-5 # LR untuk fine-tuning seluruh model +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'vit_mae_cataract_classifier.pth' +NUM_CLASSES = 4 # Sesuaikan dengan jumlah kelas katarak Anda + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + """ + Menghitung bobot untuk setiap kelas berdasarkan frekuensinya (1 / sqrt(freq)). + Ini adalah implementasi dari "Weighted loss" di metodologi Anda. + """ + # Hitung jumlah sampel per kelas + class_counts = np.bincount(dataset.targets) + # Hitung bobot: 1 / sqrt(frekuensi) + class_weights = 1. / np.sqrt(class_counts) + # Normalisasi bobot + class_weights = class_weights / np.sum(class_weights) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + all_preds = [] + all_labels = [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + # Cetak laporan klasifikasi (precision, recall, f1-score per kelas) + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)])) + + return epoch_loss, epoch_acc + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan & Split Data + # Kita asumsikan ini sudah dilakukan oleh prepare_dataset.py + # get_dataloaders akan mengambil data dari folder 'data/train' dan 'data/valid' + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + # B. Arsitektur & Pretraining + # Menggunakan ViT pretrained (MAE) dari model.py + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menangani Imbalance + # Hitung bobot kelas dari dataset training + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + # Gunakan Weighted Cross-Entropy Loss + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + # Bekukan semua layer kecuali head + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + # Latih head selama beberapa epoch + for epoch in range(5): # Misal 5 epoch untuk head + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + # Cairkan semua layer + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + # Gunakan scheduler untuk menyesuaikan learning rate + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + # E. Loop Training Utama dengan Regularisasi & Hyperparams + history = { + 'train_loss': [], 'train_acc': [], + 'valid_loss': [], 'valid_acc': [] + } + best_valid_acc = 0.0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() # Update learning rate + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + # Simpan riwayat + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # Simpan model dengan akurasi validasi terbaik (Early stopping sederhana) + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + + # F. Evaluasi & Reporting + # Simpan plot akurasi dan loss + save_plots( + history['train_acc'], history['valid_acc'], + history['train_loss'], history['valid_loss'], + OUTPUT_DIR + ) + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250924202821.py b/.history/src/train_20250924202821.py new file mode 100644 index 0000000000000000000000000000000000000000..6cbb3c571eb2e126d36ef15f3ec6d751651a7a77 --- /dev/null +++ b/.history/src/train_20250924202821.py @@ -0,0 +1,188 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +import seaborn as sns +from sklearn.metrics import confusion_matrix +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Sebagian besar diambil dari rekomendasi metodologi Anda +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' # Pastikan folder 'data' hasil prepare_dataset.py ada di sini +OUTPUT_DIR = 'outputs/models' # Folder untuk menyimpan model dan plot +IMAGE_SIZE = 224 # Ukuran input untuk ViT (bisa 224x224 atau 384x384) +BATCH_SIZE = 32 +EPOCHS = 30 # Total epoch, tapi kita akan pakai Early Stopping +LEARNING_RATE_HEAD = 1e-3 # LR untuk melatih classifier head saja +LEARNING_RATE_FINETUNE = 5e-5 # LR untuk fine-tuning seluruh model +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'vit_mae_cataract_classifier.pth' +NUM_CLASSES = 4 # Sesuaikan dengan jumlah kelas katarak Anda + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + """ + Menghitung bobot untuk setiap kelas berdasarkan frekuensinya (1 / sqrt(freq)). + Ini adalah implementasi dari "Weighted loss" di metodologi Anda. + """ + # Hitung jumlah sampel per kelas + class_counts = np.bincount(dataset.targets) + # Hitung bobot: 1 / sqrt(frekuensi) + class_weights = 1. / np.sqrt(class_counts) + # Normalisasi bobot + class_weights = class_weights / np.sum(class_weights) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + all_preds = [] + all_labels = [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + # Cetak laporan klasifikasi (precision, recall, f1-score per kelas) + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)])) + + return epoch_loss, epoch_acc + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan & Split Data + # Kita asumsikan ini sudah dilakukan oleh prepare_dataset.py + # get_dataloaders akan mengambil data dari folder 'data/train' dan 'data/valid' + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + # B. Arsitektur & Pretraining + # Menggunakan ViT pretrained (MAE) dari model.py + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menangani Imbalance + # Hitung bobot kelas dari dataset training + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + # Gunakan Weighted Cross-Entropy Loss + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + # Bekukan semua layer kecuali head + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + # Latih head selama beberapa epoch + for epoch in range(5): # Misal 5 epoch untuk head + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + # Cairkan semua layer + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + # Gunakan scheduler untuk menyesuaikan learning rate + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + # E. Loop Training Utama dengan Regularisasi & Hyperparams + history = { + 'train_loss': [], 'train_acc': [], + 'valid_loss': [], 'valid_acc': [] + } + best_valid_acc = 0.0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() # Update learning rate + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + # Simpan riwayat + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # Simpan model dengan akurasi validasi terbaik (Early stopping sederhana) + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + + # F. Evaluasi & Reporting + # Simpan plot akurasi dan loss + save_plots( + history['train_acc'], history['valid_acc'], + history['train_loss'], history['valid_loss'], + OUTPUT_DIR + ) + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250924202824.py b/.history/src/train_20250924202824.py new file mode 100644 index 0000000000000000000000000000000000000000..6cbb3c571eb2e126d36ef15f3ec6d751651a7a77 --- /dev/null +++ b/.history/src/train_20250924202824.py @@ -0,0 +1,188 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +import seaborn as sns +from sklearn.metrics import confusion_matrix +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Sebagian besar diambil dari rekomendasi metodologi Anda +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' # Pastikan folder 'data' hasil prepare_dataset.py ada di sini +OUTPUT_DIR = 'outputs/models' # Folder untuk menyimpan model dan plot +IMAGE_SIZE = 224 # Ukuran input untuk ViT (bisa 224x224 atau 384x384) +BATCH_SIZE = 32 +EPOCHS = 30 # Total epoch, tapi kita akan pakai Early Stopping +LEARNING_RATE_HEAD = 1e-3 # LR untuk melatih classifier head saja +LEARNING_RATE_FINETUNE = 5e-5 # LR untuk fine-tuning seluruh model +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'vit_mae_cataract_classifier.pth' +NUM_CLASSES = 4 # Sesuaikan dengan jumlah kelas katarak Anda + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + """ + Menghitung bobot untuk setiap kelas berdasarkan frekuensinya (1 / sqrt(freq)). + Ini adalah implementasi dari "Weighted loss" di metodologi Anda. + """ + # Hitung jumlah sampel per kelas + class_counts = np.bincount(dataset.targets) + # Hitung bobot: 1 / sqrt(frekuensi) + class_weights = 1. / np.sqrt(class_counts) + # Normalisasi bobot + class_weights = class_weights / np.sum(class_weights) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + all_preds = [] + all_labels = [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + # Cetak laporan klasifikasi (precision, recall, f1-score per kelas) + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)])) + + return epoch_loss, epoch_acc + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan & Split Data + # Kita asumsikan ini sudah dilakukan oleh prepare_dataset.py + # get_dataloaders akan mengambil data dari folder 'data/train' dan 'data/valid' + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + # B. Arsitektur & Pretraining + # Menggunakan ViT pretrained (MAE) dari model.py + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menangani Imbalance + # Hitung bobot kelas dari dataset training + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + # Gunakan Weighted Cross-Entropy Loss + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + # Bekukan semua layer kecuali head + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + # Latih head selama beberapa epoch + for epoch in range(5): # Misal 5 epoch untuk head + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + # Cairkan semua layer + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + # Gunakan scheduler untuk menyesuaikan learning rate + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + # E. Loop Training Utama dengan Regularisasi & Hyperparams + history = { + 'train_loss': [], 'train_acc': [], + 'valid_loss': [], 'valid_acc': [] + } + best_valid_acc = 0.0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() # Update learning rate + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + # Simpan riwayat + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # Simpan model dengan akurasi validasi terbaik (Early stopping sederhana) + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + + # F. Evaluasi & Reporting + # Simpan plot akurasi dan loss + save_plots( + history['train_acc'], history['valid_acc'], + history['train_loss'], history['valid_loss'], + OUTPUT_DIR + ) + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250924213657.py b/.history/src/train_20250924213657.py new file mode 100644 index 0000000000000000000000000000000000000000..6cbb3c571eb2e126d36ef15f3ec6d751651a7a77 --- /dev/null +++ b/.history/src/train_20250924213657.py @@ -0,0 +1,188 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +import seaborn as sns +from sklearn.metrics import confusion_matrix +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Sebagian besar diambil dari rekomendasi metodologi Anda +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' # Pastikan folder 'data' hasil prepare_dataset.py ada di sini +OUTPUT_DIR = 'outputs/models' # Folder untuk menyimpan model dan plot +IMAGE_SIZE = 224 # Ukuran input untuk ViT (bisa 224x224 atau 384x384) +BATCH_SIZE = 32 +EPOCHS = 30 # Total epoch, tapi kita akan pakai Early Stopping +LEARNING_RATE_HEAD = 1e-3 # LR untuk melatih classifier head saja +LEARNING_RATE_FINETUNE = 5e-5 # LR untuk fine-tuning seluruh model +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'vit_mae_cataract_classifier.pth' +NUM_CLASSES = 4 # Sesuaikan dengan jumlah kelas katarak Anda + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + """ + Menghitung bobot untuk setiap kelas berdasarkan frekuensinya (1 / sqrt(freq)). + Ini adalah implementasi dari "Weighted loss" di metodologi Anda. + """ + # Hitung jumlah sampel per kelas + class_counts = np.bincount(dataset.targets) + # Hitung bobot: 1 / sqrt(frekuensi) + class_weights = 1. / np.sqrt(class_counts) + # Normalisasi bobot + class_weights = class_weights / np.sum(class_weights) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + all_preds = [] + all_labels = [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + # Cetak laporan klasifikasi (precision, recall, f1-score per kelas) + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)])) + + return epoch_loss, epoch_acc + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan & Split Data + # Kita asumsikan ini sudah dilakukan oleh prepare_dataset.py + # get_dataloaders akan mengambil data dari folder 'data/train' dan 'data/valid' + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + # B. Arsitektur & Pretraining + # Menggunakan ViT pretrained (MAE) dari model.py + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menangani Imbalance + # Hitung bobot kelas dari dataset training + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + # Gunakan Weighted Cross-Entropy Loss + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + # Bekukan semua layer kecuali head + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + # Latih head selama beberapa epoch + for epoch in range(5): # Misal 5 epoch untuk head + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + # Cairkan semua layer + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + # Gunakan scheduler untuk menyesuaikan learning rate + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + # E. Loop Training Utama dengan Regularisasi & Hyperparams + history = { + 'train_loss': [], 'train_acc': [], + 'valid_loss': [], 'valid_acc': [] + } + best_valid_acc = 0.0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() # Update learning rate + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + # Simpan riwayat + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # Simpan model dengan akurasi validasi terbaik (Early stopping sederhana) + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + + # F. Evaluasi & Reporting + # Simpan plot akurasi dan loss + save_plots( + history['train_acc'], history['valid_acc'], + history['train_loss'], history['valid_loss'], + OUTPUT_DIR + ) + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250924213705.py b/.history/src/train_20250924213705.py new file mode 100644 index 0000000000000000000000000000000000000000..6cbb3c571eb2e126d36ef15f3ec6d751651a7a77 --- /dev/null +++ b/.history/src/train_20250924213705.py @@ -0,0 +1,188 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +import seaborn as sns +from sklearn.metrics import confusion_matrix +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Sebagian besar diambil dari rekomendasi metodologi Anda +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' # Pastikan folder 'data' hasil prepare_dataset.py ada di sini +OUTPUT_DIR = 'outputs/models' # Folder untuk menyimpan model dan plot +IMAGE_SIZE = 224 # Ukuran input untuk ViT (bisa 224x224 atau 384x384) +BATCH_SIZE = 32 +EPOCHS = 30 # Total epoch, tapi kita akan pakai Early Stopping +LEARNING_RATE_HEAD = 1e-3 # LR untuk melatih classifier head saja +LEARNING_RATE_FINETUNE = 5e-5 # LR untuk fine-tuning seluruh model +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'vit_mae_cataract_classifier.pth' +NUM_CLASSES = 4 # Sesuaikan dengan jumlah kelas katarak Anda + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + """ + Menghitung bobot untuk setiap kelas berdasarkan frekuensinya (1 / sqrt(freq)). + Ini adalah implementasi dari "Weighted loss" di metodologi Anda. + """ + # Hitung jumlah sampel per kelas + class_counts = np.bincount(dataset.targets) + # Hitung bobot: 1 / sqrt(frekuensi) + class_weights = 1. / np.sqrt(class_counts) + # Normalisasi bobot + class_weights = class_weights / np.sum(class_weights) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + all_preds = [] + all_labels = [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + # Cetak laporan klasifikasi (precision, recall, f1-score per kelas) + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)])) + + return epoch_loss, epoch_acc + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan & Split Data + # Kita asumsikan ini sudah dilakukan oleh prepare_dataset.py + # get_dataloaders akan mengambil data dari folder 'data/train' dan 'data/valid' + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + # B. Arsitektur & Pretraining + # Menggunakan ViT pretrained (MAE) dari model.py + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menangani Imbalance + # Hitung bobot kelas dari dataset training + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + # Gunakan Weighted Cross-Entropy Loss + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + # Bekukan semua layer kecuali head + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + # Latih head selama beberapa epoch + for epoch in range(5): # Misal 5 epoch untuk head + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + # Cairkan semua layer + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + # Gunakan scheduler untuk menyesuaikan learning rate + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + # E. Loop Training Utama dengan Regularisasi & Hyperparams + history = { + 'train_loss': [], 'train_acc': [], + 'valid_loss': [], 'valid_acc': [] + } + best_valid_acc = 0.0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() # Update learning rate + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + # Simpan riwayat + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # Simpan model dengan akurasi validasi terbaik (Early stopping sederhana) + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + + # F. Evaluasi & Reporting + # Simpan plot akurasi dan loss + save_plots( + history['train_acc'], history['valid_acc'], + history['train_loss'], history['valid_loss'], + OUTPUT_DIR + ) + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250925074904.py b/.history/src/train_20250925074904.py new file mode 100644 index 0000000000000000000000000000000000000000..69b9907f9f9b9addc4ebcb4a051a15c64bd26949 --- /dev/null +++ b/.history/src/train_20250925074904.py @@ -0,0 +1,188 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +import seaborn as sns +from sklearn.metrics import confusion_matrix +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Sebagian besar diambil dari rekomendasi metodologi Anda +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' # Pastikan folder 'data' hasil prepare_dataset.py ada di sini +OUTPUT_DIR = 'outputs/models' # Folder untuk menyimpan model dan plot +IMAGE_SIZE = 224 # Ukuran input untuk ViT (bisa 224x224 atau 384x384) +BATCH_SIZE = 32 +EPOCHS = 30 # Total epoch, tapi kita akan pakai Early Stopping +LEARNING_RATE_HEAD = 1e-3 # LR untuk melatih classifier head saja +LEARNING_RATE_FINETUNE = 5e-5 # LR untuk fine-tuning seluruh model +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'vit_mae_cataract_classifier.pth' +NUM_CLASSES = 4 # Sesuaikan dengan jumlah kelas katarak Anda + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + """ + Menghitung bobot untuk setiap kelas berdasarkan frekuensinya (1 / sqrt(freq)). + Ini adalah implementasi dari "Weighted loss" di metodologi Anda. + """ + # Hitung jumlah sampel per kelas + class_counts = np.bincount(dataset.targets) + # Hitung bobot: 1 / sqrt(frekuensi) + class_weights = 1. / np.sqrt(class_counts) + # Normalisasi bobot + class_weights = class_weights / np.sum(class_weights) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + all_preds = [] + all_labels = [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + # Cetak laporan klasifikasi (precision, recall, f1-score per kelas) + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)])) + + return epoch_loss, epoch_acc + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan & Split Data + # Kita asumsikan ini sudah dilakukan oleh prepare_dataset.py + # get_dataloaders akan mengambil data dari folder 'data/train' dan 'data/valid' + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + # B. Arsitektur & Pretraining + # Menggunakan ViT pretrained (MAE) dari model.py + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menangani Imbalance + # Hitung bobot kelas dari dataset training + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + # Gunakan Weighted Cross-Entropy Loss + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + # Bekukan semua layer kecuali head + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + # Latih head selama beberapa epoch + for epoch in range(5): # Misal 5 epoch untuk head + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + # Cairkan semua layer + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + # Gunakan scheduler untuk menyesuaikan learning rate + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + # E. Loop Training Utama dengan Regularisasi & Hyperparams + history = { + 'train_loss': [], 'train_acc': [], + 'valid_loss': [], 'valid_acc': [] + } + best_valid_acc = 0.0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() # Update learning rate + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + # Simpan riwayat + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # Simpan model dengan akurasi validasi terbaik (Early stopping sederhana) + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + + # F. Evaluasi & Reporting + + save_plots( + history['train_acc'], history['valid_acc'], + history['train_loss'], history['valid_loss'], + OUTPUT_DIR + ) + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250925074906.py b/.history/src/train_20250925074906.py new file mode 100644 index 0000000000000000000000000000000000000000..d836f1ecf32a1ed241fb39d52a42ab02a04cac08 --- /dev/null +++ b/.history/src/train_20250925074906.py @@ -0,0 +1,187 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +import seaborn as sns +from sklearn.metrics import confusion_matrix +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Sebagian besar diambil dari rekomendasi metodologi Anda +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' # Pastikan folder 'data' hasil prepare_dataset.py ada di sini +OUTPUT_DIR = 'outputs/models' # Folder untuk menyimpan model dan plot +IMAGE_SIZE = 224 # Ukuran input untuk ViT (bisa 224x224 atau 384x384) +BATCH_SIZE = 32 +EPOCHS = 30 # Total epoch, tapi kita akan pakai Early Stopping +LEARNING_RATE_HEAD = 1e-3 # LR untuk melatih classifier head saja +LEARNING_RATE_FINETUNE = 5e-5 # LR untuk fine-tuning seluruh model +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'vit_mae_cataract_classifier.pth' +NUM_CLASSES = 4 # Sesuaikan dengan jumlah kelas katarak Anda + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + """ + Menghitung bobot untuk setiap kelas berdasarkan frekuensinya (1 / sqrt(freq)). + Ini adalah implementasi dari "Weighted loss" di metodologi Anda. + """ + # Hitung jumlah sampel per kelas + class_counts = np.bincount(dataset.targets) + # Hitung bobot: 1 / sqrt(frekuensi) + class_weights = 1. / np.sqrt(class_counts) + # Normalisasi bobot + class_weights = class_weights / np.sum(class_weights) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + all_preds = [] + all_labels = [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + # Cetak laporan klasifikasi (precision, recall, f1-score per kelas) + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)])) + + return epoch_loss, epoch_acc + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan & Split Data + # Kita asumsikan ini sudah dilakukan oleh prepare_dataset.py + # get_dataloaders akan mengambil data dari folder 'data/train' dan 'data/valid' + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + # B. Arsitektur & Pretraining + # Menggunakan ViT pretrained (MAE) dari model.py + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menangani Imbalance + # Hitung bobot kelas dari dataset training + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + # Gunakan Weighted Cross-Entropy Loss + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + # Bekukan semua layer kecuali head + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + # Latih head selama beberapa epoch + for epoch in range(5): # Misal 5 epoch untuk head + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + # Cairkan semua layer + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + # Gunakan scheduler untuk menyesuaikan learning rate + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + # E. Loop Training Utama dengan Regularisasi & Hyperparams + history = { + 'train_loss': [], 'train_acc': [], + 'valid_loss': [], 'valid_acc': [] + } + best_valid_acc = 0.0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() # Update learning rate + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + # Simpan riwayat + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # Simpan model dengan akurasi validasi terbaik (Early stopping sederhana) + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + + # F. Evaluasi & Reporting + save_plots( + history['train_acc'], history['valid_acc'], + history['train_loss'], history['valid_loss'], + OUTPUT_DIR + ) + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250925074909.py b/.history/src/train_20250925074909.py new file mode 100644 index 0000000000000000000000000000000000000000..d149bf38e0fd1c8cafd37da9c81aca2e0b25b315 --- /dev/null +++ b/.history/src/train_20250925074909.py @@ -0,0 +1,187 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +import seaborn as sns +from sklearn.metrics import confusion_matrix +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Sebagian besar diambil dari rekomendasi metodologi Anda +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' # Pastikan folder 'data' hasil prepare_dataset.py ada di sini +OUTPUT_DIR = 'outputs/models' # Folder untuk menyimpan model dan plot +IMAGE_SIZE = 224 # Ukuran input untuk ViT (bisa 224x224 atau 384x384) +BATCH_SIZE = 32 +EPOCHS = 30 # Total epoch, tapi kita akan pakai Early Stopping +LEARNING_RATE_HEAD = 1e-3 # LR untuk melatih classifier head saja +LEARNING_RATE_FINETUNE = 5e-5 # LR untuk fine-tuning seluruh model +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'vit_mae_cataract_classifier.pth' +NUM_CLASSES = 4 # Sesuaikan dengan jumlah kelas katarak Anda + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + """ + Menghitung bobot untuk setiap kelas berdasarkan frekuensinya (1 / sqrt(freq)). + Ini adalah implementasi dari "Weighted loss" di metodologi Anda. + """ + # Hitung jumlah sampel per kelas + class_counts = np.bincount(dataset.targets) + # Hitung bobot: 1 / sqrt(frekuensi) + class_weights = 1. / np.sqrt(class_counts) + # Normalisasi bobot + class_weights = class_weights / np.sum(class_weights) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + all_preds = [] + all_labels = [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + # Cetak laporan klasifikasi (precision, recall, f1-score per kelas) + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)])) + + return epoch_loss, epoch_acc + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan & Split Data + # Kita asumsikan ini sudah dilakukan oleh prepare_dataset.py + # get_dataloaders akan mengambil data dari folder 'data/train' dan 'data/valid' + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + # B. Arsitektur & Pretraining + # Menggunakan ViT pretrained (MAE) dari model.py + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menangani Imbalance + # Hitung bobot kelas dari dataset training + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + # Gunakan Weighted Cross-Entropy Loss + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + # Bekukan semua layer kecuali head + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + # Latih head selama beberapa epoch + for epoch in range(5): # Misal 5 epoch untuk head + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + # Cairkan semua layer + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + # Gunakan scheduler untuk menyesuaikan learning rate + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + # E. Loop Training Utama dengan Regularisasi & Hyperparams + history = { + 'train_loss': [], 'train_acc': [], + 'valid_loss': [], 'valid_acc': [] + } + best_valid_acc = 0.0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() # Update learning rate + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + # Simpan riwayat + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # Simpan model dengan akurasi validasi terbaik (Early stopping sederhana) + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + + # Evaluasi & Reporting + save_plots( + history['train_acc'], history['valid_acc'], + history['train_loss'], history['valid_loss'], + OUTPUT_DIR + ) + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250925074913.py b/.history/src/train_20250925074913.py new file mode 100644 index 0000000000000000000000000000000000000000..a6cd67961140a854a3ed7b5cba054970c2f9bfac --- /dev/null +++ b/.history/src/train_20250925074913.py @@ -0,0 +1,187 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +import seaborn as sns +from sklearn.metrics import confusion_matrix +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Sebagian besar diambil dari rekomendasi metodologi Anda +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' # Pastikan folder 'data' hasil prepare_dataset.py ada di sini +OUTPUT_DIR = 'outputs/models' # Folder untuk menyimpan model dan plot +IMAGE_SIZE = 224 # Ukuran input untuk ViT (bisa 224x224 atau 384x384) +BATCH_SIZE = 32 +EPOCHS = 30 # Total epoch, tapi kita akan pakai Early Stopping +LEARNING_RATE_HEAD = 1e-3 # LR untuk melatih classifier head saja +LEARNING_RATE_FINETUNE = 5e-5 # LR untuk fine-tuning seluruh model +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'vit_mae_cataract_classifier.pth' +NUM_CLASSES = 4 # Sesuaikan dengan jumlah kelas katarak Anda + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + """ + Menghitung bobot untuk setiap kelas berdasarkan frekuensinya (1 / sqrt(freq)). + Ini adalah implementasi dari "Weighted loss" di metodologi Anda. + """ + # Hitung jumlah sampel per kelas + class_counts = np.bincount(dataset.targets) + # Hitung bobot: 1 / sqrt(frekuensi) + class_weights = 1. / np.sqrt(class_counts) + # Normalisasi bobot + class_weights = class_weights / np.sum(class_weights) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + all_preds = [] + all_labels = [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + # Cetak laporan klasifikasi (precision, recall, f1-score per kelas) + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)])) + + return epoch_loss, epoch_acc + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan & Split Data + # Kita asumsikan ini sudah dilakukan oleh prepare_dataset.py + # get_dataloaders akan mengambil data dari folder 'data/train' dan 'data/valid' + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + # B. Arsitektur & Pretraining + # Menggunakan ViT pretrained (MAE) dari model.py + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menangani Imbalance + # Hitung bobot kelas dari dataset training + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + # Gunakan Weighted Cross-Entropy Loss + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + # Bekukan semua layer kecuali head + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + # Latih head selama beberapa epoch + for epoch in range(5): # Misal 5 epoch untuk head + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + # Cairkan semua layer + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + # Gunakan scheduler untuk menyesuaikan learning rate + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + # E. Loop Training Utama dengan Regularisasi & Hyperparams + history = { + 'train_loss': [], 'train_acc': [], + 'valid_loss': [], 'valid_acc': [] + } + best_valid_acc = 0.0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() # Update learning rate + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + # Simpan riwayat + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # Simpan model dengan akurasi validasi terbaik (Early stopping sederhana) + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + + # Evaluasi + save_plots( + history['train_acc'], history['valid_acc'], + history['train_loss'], history['valid_loss'], + OUTPUT_DIR + ) + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250925074916.py b/.history/src/train_20250925074916.py new file mode 100644 index 0000000000000000000000000000000000000000..a6cd67961140a854a3ed7b5cba054970c2f9bfac --- /dev/null +++ b/.history/src/train_20250925074916.py @@ -0,0 +1,187 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +import seaborn as sns +from sklearn.metrics import confusion_matrix +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Sebagian besar diambil dari rekomendasi metodologi Anda +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' # Pastikan folder 'data' hasil prepare_dataset.py ada di sini +OUTPUT_DIR = 'outputs/models' # Folder untuk menyimpan model dan plot +IMAGE_SIZE = 224 # Ukuran input untuk ViT (bisa 224x224 atau 384x384) +BATCH_SIZE = 32 +EPOCHS = 30 # Total epoch, tapi kita akan pakai Early Stopping +LEARNING_RATE_HEAD = 1e-3 # LR untuk melatih classifier head saja +LEARNING_RATE_FINETUNE = 5e-5 # LR untuk fine-tuning seluruh model +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'vit_mae_cataract_classifier.pth' +NUM_CLASSES = 4 # Sesuaikan dengan jumlah kelas katarak Anda + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + """ + Menghitung bobot untuk setiap kelas berdasarkan frekuensinya (1 / sqrt(freq)). + Ini adalah implementasi dari "Weighted loss" di metodologi Anda. + """ + # Hitung jumlah sampel per kelas + class_counts = np.bincount(dataset.targets) + # Hitung bobot: 1 / sqrt(frekuensi) + class_weights = 1. / np.sqrt(class_counts) + # Normalisasi bobot + class_weights = class_weights / np.sum(class_weights) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + all_preds = [] + all_labels = [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + # Cetak laporan klasifikasi (precision, recall, f1-score per kelas) + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)])) + + return epoch_loss, epoch_acc + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan & Split Data + # Kita asumsikan ini sudah dilakukan oleh prepare_dataset.py + # get_dataloaders akan mengambil data dari folder 'data/train' dan 'data/valid' + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + # B. Arsitektur & Pretraining + # Menggunakan ViT pretrained (MAE) dari model.py + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menangani Imbalance + # Hitung bobot kelas dari dataset training + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + # Gunakan Weighted Cross-Entropy Loss + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + # Bekukan semua layer kecuali head + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + # Latih head selama beberapa epoch + for epoch in range(5): # Misal 5 epoch untuk head + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + # Cairkan semua layer + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + # Gunakan scheduler untuk menyesuaikan learning rate + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + # E. Loop Training Utama dengan Regularisasi & Hyperparams + history = { + 'train_loss': [], 'train_acc': [], + 'valid_loss': [], 'valid_acc': [] + } + best_valid_acc = 0.0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() # Update learning rate + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + # Simpan riwayat + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # Simpan model dengan akurasi validasi terbaik (Early stopping sederhana) + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + + # Evaluasi + save_plots( + history['train_acc'], history['valid_acc'], + history['train_loss'], history['valid_loss'], + OUTPUT_DIR + ) + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250925182547.py b/.history/src/train_20250925182547.py new file mode 100644 index 0000000000000000000000000000000000000000..515c607fe0ae000cf8dcdee15ba44292a40866d6 --- /dev/null +++ b/.history/src/train_20250925182547.py @@ -0,0 +1,187 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +import seaborn as sns +from sklearn.metrics import confusion_matrix +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Sebagian besar diambil dari rekomendasi metodologi Anda +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' # Pastikan folder 'data' hasil prepare_dataset.py ada di sini +OUTPUT_DIR = 'outputs/models' # Folder untuk menyimpan model dan plot +IMAGE_SIZE = 224 # Ukuran input untuk ViT (bisa 224x224 atau 384x384) +BATCH_SIZE = 2 +EPOCHS = 30 # Total epoch, tapi kita akan pakai Early Stopping +LEARNING_RATE_HEAD = 1e-3 # LR untuk melatih classifier head saja +LEARNING_RATE_FINETUNE = 5e-5 # LR untuk fine-tuning seluruh model +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'vit_mae_cataract_classifier.pth' +NUM_CLASSES = 4 # Sesuaikan dengan jumlah kelas katarak Anda + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + """ + Menghitung bobot untuk setiap kelas berdasarkan frekuensinya (1 / sqrt(freq)). + Ini adalah implementasi dari "Weighted loss" di metodologi Anda. + """ + # Hitung jumlah sampel per kelas + class_counts = np.bincount(dataset.targets) + # Hitung bobot: 1 / sqrt(frekuensi) + class_weights = 1. / np.sqrt(class_counts) + # Normalisasi bobot + class_weights = class_weights / np.sum(class_weights) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + all_preds = [] + all_labels = [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + # Cetak laporan klasifikasi (precision, recall, f1-score per kelas) + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)])) + + return epoch_loss, epoch_acc + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan & Split Data + # Kita asumsikan ini sudah dilakukan oleh prepare_dataset.py + # get_dataloaders akan mengambil data dari folder 'data/train' dan 'data/valid' + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + # B. Arsitektur & Pretraining + # Menggunakan ViT pretrained (MAE) dari model.py + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menangani Imbalance + # Hitung bobot kelas dari dataset training + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + # Gunakan Weighted Cross-Entropy Loss + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + # Bekukan semua layer kecuali head + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + # Latih head selama beberapa epoch + for epoch in range(5): # Misal 5 epoch untuk head + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + # Cairkan semua layer + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + # Gunakan scheduler untuk menyesuaikan learning rate + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + # E. Loop Training Utama dengan Regularisasi & Hyperparams + history = { + 'train_loss': [], 'train_acc': [], + 'valid_loss': [], 'valid_acc': [] + } + best_valid_acc = 0.0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() # Update learning rate + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + # Simpan riwayat + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # Simpan model dengan akurasi validasi terbaik (Early stopping sederhana) + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + + # Evaluasi + save_plots( + history['train_acc'], history['valid_acc'], + history['train_loss'], history['valid_loss'], + OUTPUT_DIR + ) + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250925182548.py b/.history/src/train_20250925182548.py new file mode 100644 index 0000000000000000000000000000000000000000..4bcdad62813805ffe874d2e907f6c0ee37c8222c --- /dev/null +++ b/.history/src/train_20250925182548.py @@ -0,0 +1,187 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +import seaborn as sns +from sklearn.metrics import confusion_matrix +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Sebagian besar diambil dari rekomendasi metodologi Anda +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' # Pastikan folder 'data' hasil prepare_dataset.py ada di sini +OUTPUT_DIR = 'outputs/models' # Folder untuk menyimpan model dan plot +IMAGE_SIZE = 224 # Ukuran input untuk ViT (bisa 224x224 atau 384x384) +BATCH_SIZE = 22 +EPOCHS = 30 # Total epoch, tapi kita akan pakai Early Stopping +LEARNING_RATE_HEAD = 1e-3 # LR untuk melatih classifier head saja +LEARNING_RATE_FINETUNE = 5e-5 # LR untuk fine-tuning seluruh model +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'vit_mae_cataract_classifier.pth' +NUM_CLASSES = 4 # Sesuaikan dengan jumlah kelas katarak Anda + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + """ + Menghitung bobot untuk setiap kelas berdasarkan frekuensinya (1 / sqrt(freq)). + Ini adalah implementasi dari "Weighted loss" di metodologi Anda. + """ + # Hitung jumlah sampel per kelas + class_counts = np.bincount(dataset.targets) + # Hitung bobot: 1 / sqrt(frekuensi) + class_weights = 1. / np.sqrt(class_counts) + # Normalisasi bobot + class_weights = class_weights / np.sum(class_weights) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + all_preds = [] + all_labels = [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + # Cetak laporan klasifikasi (precision, recall, f1-score per kelas) + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)])) + + return epoch_loss, epoch_acc + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan & Split Data + # Kita asumsikan ini sudah dilakukan oleh prepare_dataset.py + # get_dataloaders akan mengambil data dari folder 'data/train' dan 'data/valid' + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + # B. Arsitektur & Pretraining + # Menggunakan ViT pretrained (MAE) dari model.py + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menangani Imbalance + # Hitung bobot kelas dari dataset training + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + # Gunakan Weighted Cross-Entropy Loss + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + # Bekukan semua layer kecuali head + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + # Latih head selama beberapa epoch + for epoch in range(5): # Misal 5 epoch untuk head + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + # Cairkan semua layer + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + # Gunakan scheduler untuk menyesuaikan learning rate + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + # E. Loop Training Utama dengan Regularisasi & Hyperparams + history = { + 'train_loss': [], 'train_acc': [], + 'valid_loss': [], 'valid_acc': [] + } + best_valid_acc = 0.0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() # Update learning rate + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + # Simpan riwayat + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # Simpan model dengan akurasi validasi terbaik (Early stopping sederhana) + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + + # Evaluasi + save_plots( + history['train_acc'], history['valid_acc'], + history['train_loss'], history['valid_loss'], + OUTPUT_DIR + ) + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250925182550.py b/.history/src/train_20250925182550.py new file mode 100644 index 0000000000000000000000000000000000000000..f8fa7b7ac7fdfcbc959fc30f6096d3d18c13eccf --- /dev/null +++ b/.history/src/train_20250925182550.py @@ -0,0 +1,187 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +import seaborn as sns +from sklearn.metrics import confusion_matrix +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Sebagian besar diambil dari rekomendasi metodologi Anda +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' # Pastikan folder 'data' hasil prepare_dataset.py ada di sini +OUTPUT_DIR = 'outputs/models' # Folder untuk menyimpan model dan plot +IMAGE_SIZE = 224 # Ukuran input untuk ViT (bisa 224x224 atau 384x384) +BATCH_SIZE = 12 +EPOCHS = 30 # Total epoch, tapi kita akan pakai Early Stopping +LEARNING_RATE_HEAD = 1e-3 # LR untuk melatih classifier head saja +LEARNING_RATE_FINETUNE = 5e-5 # LR untuk fine-tuning seluruh model +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'vit_mae_cataract_classifier.pth' +NUM_CLASSES = 4 # Sesuaikan dengan jumlah kelas katarak Anda + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + """ + Menghitung bobot untuk setiap kelas berdasarkan frekuensinya (1 / sqrt(freq)). + Ini adalah implementasi dari "Weighted loss" di metodologi Anda. + """ + # Hitung jumlah sampel per kelas + class_counts = np.bincount(dataset.targets) + # Hitung bobot: 1 / sqrt(frekuensi) + class_weights = 1. / np.sqrt(class_counts) + # Normalisasi bobot + class_weights = class_weights / np.sum(class_weights) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + all_preds = [] + all_labels = [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + # Cetak laporan klasifikasi (precision, recall, f1-score per kelas) + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)])) + + return epoch_loss, epoch_acc + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan & Split Data + # Kita asumsikan ini sudah dilakukan oleh prepare_dataset.py + # get_dataloaders akan mengambil data dari folder 'data/train' dan 'data/valid' + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + # B. Arsitektur & Pretraining + # Menggunakan ViT pretrained (MAE) dari model.py + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menangani Imbalance + # Hitung bobot kelas dari dataset training + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + # Gunakan Weighted Cross-Entropy Loss + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + # Bekukan semua layer kecuali head + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + # Latih head selama beberapa epoch + for epoch in range(5): # Misal 5 epoch untuk head + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + # Cairkan semua layer + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + # Gunakan scheduler untuk menyesuaikan learning rate + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + # E. Loop Training Utama dengan Regularisasi & Hyperparams + history = { + 'train_loss': [], 'train_acc': [], + 'valid_loss': [], 'valid_acc': [] + } + best_valid_acc = 0.0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() # Update learning rate + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + # Simpan riwayat + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # Simpan model dengan akurasi validasi terbaik (Early stopping sederhana) + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + + # Evaluasi + save_plots( + history['train_acc'], history['valid_acc'], + history['train_loss'], history['valid_loss'], + OUTPUT_DIR + ) + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250925182552.py b/.history/src/train_20250925182552.py new file mode 100644 index 0000000000000000000000000000000000000000..464bd42bb47d247172f1073102330fc17064f028 --- /dev/null +++ b/.history/src/train_20250925182552.py @@ -0,0 +1,187 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +import seaborn as sns +from sklearn.metrics import confusion_matrix +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Sebagian besar diambil dari rekomendasi metodologi Anda +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' # Pastikan folder 'data' hasil prepare_dataset.py ada di sini +OUTPUT_DIR = 'outputs/models' # Folder untuk menyimpan model dan plot +IMAGE_SIZE = 224 # Ukuran input untuk ViT (bisa 224x224 atau 384x384) +BATCH_SIZE = 16 +EPOCHS = 30 # Total epoch, tapi kita akan pakai Early Stopping +LEARNING_RATE_HEAD = 1e-3 # LR untuk melatih classifier head saja +LEARNING_RATE_FINETUNE = 5e-5 # LR untuk fine-tuning seluruh model +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'vit_mae_cataract_classifier.pth' +NUM_CLASSES = 4 # Sesuaikan dengan jumlah kelas katarak Anda + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + """ + Menghitung bobot untuk setiap kelas berdasarkan frekuensinya (1 / sqrt(freq)). + Ini adalah implementasi dari "Weighted loss" di metodologi Anda. + """ + # Hitung jumlah sampel per kelas + class_counts = np.bincount(dataset.targets) + # Hitung bobot: 1 / sqrt(frekuensi) + class_weights = 1. / np.sqrt(class_counts) + # Normalisasi bobot + class_weights = class_weights / np.sum(class_weights) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + all_preds = [] + all_labels = [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + # Cetak laporan klasifikasi (precision, recall, f1-score per kelas) + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)])) + + return epoch_loss, epoch_acc + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan & Split Data + # Kita asumsikan ini sudah dilakukan oleh prepare_dataset.py + # get_dataloaders akan mengambil data dari folder 'data/train' dan 'data/valid' + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + # B. Arsitektur & Pretraining + # Menggunakan ViT pretrained (MAE) dari model.py + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menangani Imbalance + # Hitung bobot kelas dari dataset training + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + # Gunakan Weighted Cross-Entropy Loss + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + # Bekukan semua layer kecuali head + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + # Latih head selama beberapa epoch + for epoch in range(5): # Misal 5 epoch untuk head + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + # Cairkan semua layer + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + # Gunakan scheduler untuk menyesuaikan learning rate + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + # E. Loop Training Utama dengan Regularisasi & Hyperparams + history = { + 'train_loss': [], 'train_acc': [], + 'valid_loss': [], 'valid_acc': [] + } + best_valid_acc = 0.0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() # Update learning rate + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + # Simpan riwayat + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # Simpan model dengan akurasi validasi terbaik (Early stopping sederhana) + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + + # Evaluasi + save_plots( + history['train_acc'], history['valid_acc'], + history['train_loss'], history['valid_loss'], + OUTPUT_DIR + ) + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250925183530.py b/.history/src/train_20250925183530.py new file mode 100644 index 0000000000000000000000000000000000000000..c557a7d487d158e5f3024c6d9bb2710b86e4b949 --- /dev/null +++ b/.history/src/train_20250925183530.py @@ -0,0 +1,187 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +import seaborn as sns +from sklearn.metrics import confusion_matrix +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Sebagian besar diambil dari rekomendasi metodologi Anda +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' # Pastikan folder 'data' hasil prepare_dataset.py ada di sini +OUTPUT_DIR = 'outputs/models' # Folder untuk menyimpan model dan plot +IMAGE_SIZE = 224 # Ukuran input untuk ViT (bisa 224x224 atau 384x384) +BATCH_SIZE = 16 +EPOCHS = 50 # Total epoch, tapi kita akan pakai Early Stopping +LEARNING_RATE_HEAD = 1e-3 # LR untuk melatih classifier head saja +LEARNING_RATE_FINETUNE = 5e-5 # LR untuk fine-tuning seluruh model +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'vit_mae_cataract_classifier.pth' +NUM_CLASSES = 4 # Sesuaikan dengan jumlah kelas katarak Anda + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + """ + Menghitung bobot untuk setiap kelas berdasarkan frekuensinya (1 / sqrt(freq)). + Ini adalah implementasi dari "Weighted loss" di metodologi Anda. + """ + # Hitung jumlah sampel per kelas + class_counts = np.bincount(dataset.targets) + # Hitung bobot: 1 / sqrt(frekuensi) + class_weights = 1. / np.sqrt(class_counts) + # Normalisasi bobot + class_weights = class_weights / np.sum(class_weights) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + all_preds = [] + all_labels = [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + # Cetak laporan klasifikasi (precision, recall, f1-score per kelas) + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)])) + + return epoch_loss, epoch_acc + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan & Split Data + # Kita asumsikan ini sudah dilakukan oleh prepare_dataset.py + # get_dataloaders akan mengambil data dari folder 'data/train' dan 'data/valid' + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + # B. Arsitektur & Pretraining + # Menggunakan ViT pretrained (MAE) dari model.py + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menangani Imbalance + # Hitung bobot kelas dari dataset training + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + # Gunakan Weighted Cross-Entropy Loss + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + # Bekukan semua layer kecuali head + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + # Latih head selama beberapa epoch + for epoch in range(5): # Misal 5 epoch untuk head + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + # Cairkan semua layer + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + # Gunakan scheduler untuk menyesuaikan learning rate + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + # E. Loop Training Utama dengan Regularisasi & Hyperparams + history = { + 'train_loss': [], 'train_acc': [], + 'valid_loss': [], 'valid_acc': [] + } + best_valid_acc = 0.0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() # Update learning rate + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + # Simpan riwayat + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # Simpan model dengan akurasi validasi terbaik (Early stopping sederhana) + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + + # Evaluasi + save_plots( + history['train_acc'], history['valid_acc'], + history['train_loss'], history['valid_loss'], + OUTPUT_DIR + ) + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250926073823.py b/.history/src/train_20250926073823.py new file mode 100644 index 0000000000000000000000000000000000000000..c81b460ab9b4180e350d594e06bfb332f4f7b23c --- /dev/null +++ b/.history/src/train_20250926073823.py @@ -0,0 +1,171 @@ +# src/train.py (Versi dengan Automatic Mixed Precision) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = '../data' +OUTPUT_DIR = '../outputs' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 100 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_amp.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + class_counts = np.bincount(dataset.targets) + class_counts = class_counts[class_counts > 0] # Hanya gunakan kelas yang ada + class_weights = 1. / np.sqrt(class_counts) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device, scaler): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + + # Gunakan autocast HANYA jika di CUDA + if device == 'cuda': + with torch.cuda.amp.autocast(): + outputs = model(images) + loss = criterion(outputs, labels) + + scaler.scale(loss).backward() + scaler.step(optimizer) + scaler.update() + else: # Jika di CPU (atau AMD GPU), jalankan mode normal + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # Inisialisasi GradScaler HANYA jika di CUDA + scaler = torch.cuda.amp.GradScaler() if DEVICE == 'cuda' else None + if scaler: + print("Mixed Precision (AMP) diaktifkan.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + # Saat training, oper scaler. Fungsi akan menanganinya. + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE, scaler) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE, scaler) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # --- F. EVALUASI & REPORTING --- + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250926073833.py b/.history/src/train_20250926073833.py new file mode 100644 index 0000000000000000000000000000000000000000..af0bc6aff2bfb67b5980d534b8bcead899e0643a --- /dev/null +++ b/.history/src/train_20250926073833.py @@ -0,0 +1,171 @@ +# src/train.py (Versi dengan Automatic Mixed Precision) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = '../data' +OUTPUT_DIR = '../outputs' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_amp.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + class_counts = np.bincount(dataset.targets) + class_counts = class_counts[class_counts > 0] # Hanya gunakan kelas yang ada + class_weights = 1. / np.sqrt(class_counts) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device, scaler): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + + # Gunakan autocast HANYA jika di CUDA + if device == 'cuda': + with torch.cuda.amp.autocast(): + outputs = model(images) + loss = criterion(outputs, labels) + + scaler.scale(loss).backward() + scaler.step(optimizer) + scaler.update() + else: # Jika di CPU (atau AMD GPU), jalankan mode normal + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # Inisialisasi GradScaler HANYA jika di CUDA + scaler = torch.cuda.amp.GradScaler() if DEVICE == 'cuda' else None + if scaler: + print("Mixed Precision (AMP) diaktifkan.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + # Saat training, oper scaler. Fungsi akan menanganinya. + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE, scaler) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE, scaler) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # --- F. EVALUASI & REPORTING --- + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250926073851.py b/.history/src/train_20250926073851.py new file mode 100644 index 0000000000000000000000000000000000000000..c71fe6716682fd7493f07d8a313aad785393a3ad --- /dev/null +++ b/.history/src/train_20250926073851.py @@ -0,0 +1,171 @@ +# src/train.py (Versi dengan Automatic Mixed Precision) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = '../data' +OUTPUT_DIR = '../outputs/' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_amp.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + class_counts = np.bincount(dataset.targets) + class_counts = class_counts[class_counts > 0] # Hanya gunakan kelas yang ada + class_weights = 1. / np.sqrt(class_counts) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device, scaler): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + + # Gunakan autocast HANYA jika di CUDA + if device == 'cuda': + with torch.cuda.amp.autocast(): + outputs = model(images) + loss = criterion(outputs, labels) + + scaler.scale(loss).backward() + scaler.step(optimizer) + scaler.update() + else: # Jika di CPU (atau AMD GPU), jalankan mode normal + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # Inisialisasi GradScaler HANYA jika di CUDA + scaler = torch.cuda.amp.GradScaler() if DEVICE == 'cuda' else None + if scaler: + print("Mixed Precision (AMP) diaktifkan.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + # Saat training, oper scaler. Fungsi akan menanganinya. + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE, scaler) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE, scaler) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # --- F. EVALUASI & REPORTING --- + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250926073853.py b/.history/src/train_20250926073853.py new file mode 100644 index 0000000000000000000000000000000000000000..98ea18455f280425cb2ac9b9b021ebd4b1442f81 --- /dev/null +++ b/.history/src/train_20250926073853.py @@ -0,0 +1,171 @@ +# src/train.py (Versi dengan Automatic Mixed Precision) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = '../data' +OUTPUT_DIR = '../outputs/,' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_amp.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + class_counts = np.bincount(dataset.targets) + class_counts = class_counts[class_counts > 0] # Hanya gunakan kelas yang ada + class_weights = 1. / np.sqrt(class_counts) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device, scaler): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + + # Gunakan autocast HANYA jika di CUDA + if device == 'cuda': + with torch.cuda.amp.autocast(): + outputs = model(images) + loss = criterion(outputs, labels) + + scaler.scale(loss).backward() + scaler.step(optimizer) + scaler.update() + else: # Jika di CPU (atau AMD GPU), jalankan mode normal + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # Inisialisasi GradScaler HANYA jika di CUDA + scaler = torch.cuda.amp.GradScaler() if DEVICE == 'cuda' else None + if scaler: + print("Mixed Precision (AMP) diaktifkan.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + # Saat training, oper scaler. Fungsi akan menanganinya. + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE, scaler) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE, scaler) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # --- F. EVALUASI & REPORTING --- + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250926073854.py b/.history/src/train_20250926073854.py new file mode 100644 index 0000000000000000000000000000000000000000..c71fe6716682fd7493f07d8a313aad785393a3ad --- /dev/null +++ b/.history/src/train_20250926073854.py @@ -0,0 +1,171 @@ +# src/train.py (Versi dengan Automatic Mixed Precision) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = '../data' +OUTPUT_DIR = '../outputs/' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_amp.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + class_counts = np.bincount(dataset.targets) + class_counts = class_counts[class_counts > 0] # Hanya gunakan kelas yang ada + class_weights = 1. / np.sqrt(class_counts) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device, scaler): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + + # Gunakan autocast HANYA jika di CUDA + if device == 'cuda': + with torch.cuda.amp.autocast(): + outputs = model(images) + loss = criterion(outputs, labels) + + scaler.scale(loss).backward() + scaler.step(optimizer) + scaler.update() + else: # Jika di CPU (atau AMD GPU), jalankan mode normal + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # Inisialisasi GradScaler HANYA jika di CUDA + scaler = torch.cuda.amp.GradScaler() if DEVICE == 'cuda' else None + if scaler: + print("Mixed Precision (AMP) diaktifkan.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + # Saat training, oper scaler. Fungsi akan menanganinya. + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE, scaler) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE, scaler) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # --- F. EVALUASI & REPORTING --- + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250926073916.py b/.history/src/train_20250926073916.py new file mode 100644 index 0000000000000000000000000000000000000000..389048ab54a5cbc33d81b4e9f5351440da461eb2 --- /dev/null +++ b/.history/src/train_20250926073916.py @@ -0,0 +1,171 @@ +# src/train.py (Versi dengan Automatic Mixed Precision) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = '../data' +OUTPUT_DIR = '../outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_amp.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + class_counts = np.bincount(dataset.targets) + class_counts = class_counts[class_counts > 0] # Hanya gunakan kelas yang ada + class_weights = 1. / np.sqrt(class_counts) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device, scaler): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + + # Gunakan autocast HANYA jika di CUDA + if device == 'cuda': + with torch.cuda.amp.autocast(): + outputs = model(images) + loss = criterion(outputs, labels) + + scaler.scale(loss).backward() + scaler.step(optimizer) + scaler.update() + else: # Jika di CPU (atau AMD GPU), jalankan mode normal + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # Inisialisasi GradScaler HANYA jika di CUDA + scaler = torch.cuda.amp.GradScaler() if DEVICE == 'cuda' else None + if scaler: + print("Mixed Precision (AMP) diaktifkan.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + # Saat training, oper scaler. Fungsi akan menanganinya. + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE, scaler) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE, scaler) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # --- F. EVALUASI & REPORTING --- + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250926074159.py b/.history/src/train_20250926074159.py new file mode 100644 index 0000000000000000000000000000000000000000..d094dd68b3c9324c52a81e168af10aa9138c5b7c --- /dev/null +++ b/.history/src/train_20250926074159.py @@ -0,0 +1,171 @@ +# src/train.py (Versi dengan Automatic Mixed Precision) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = '/data' +OUTPUT_DIR = '../outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_amp.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + class_counts = np.bincount(dataset.targets) + class_counts = class_counts[class_counts > 0] # Hanya gunakan kelas yang ada + class_weights = 1. / np.sqrt(class_counts) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device, scaler): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + + # Gunakan autocast HANYA jika di CUDA + if device == 'cuda': + with torch.cuda.amp.autocast(): + outputs = model(images) + loss = criterion(outputs, labels) + + scaler.scale(loss).backward() + scaler.step(optimizer) + scaler.update() + else: # Jika di CPU (atau AMD GPU), jalankan mode normal + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # Inisialisasi GradScaler HANYA jika di CUDA + scaler = torch.cuda.amp.GradScaler() if DEVICE == 'cuda' else None + if scaler: + print("Mixed Precision (AMP) diaktifkan.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + # Saat training, oper scaler. Fungsi akan menanganinya. + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE, scaler) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE, scaler) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # --- F. EVALUASI & REPORTING --- + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250926074202.py b/.history/src/train_20250926074202.py new file mode 100644 index 0000000000000000000000000000000000000000..f26cf2d7139fec995fd801b53f08c0f919939eb4 --- /dev/null +++ b/.history/src/train_20250926074202.py @@ -0,0 +1,171 @@ +# src/train.py (Versi dengan Automatic Mixed Precision) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = '/data' +OUTPUT_DIR = '/outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_amp.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + class_counts = np.bincount(dataset.targets) + class_counts = class_counts[class_counts > 0] # Hanya gunakan kelas yang ada + class_weights = 1. / np.sqrt(class_counts) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device, scaler): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + + # Gunakan autocast HANYA jika di CUDA + if device == 'cuda': + with torch.cuda.amp.autocast(): + outputs = model(images) + loss = criterion(outputs, labels) + + scaler.scale(loss).backward() + scaler.step(optimizer) + scaler.update() + else: # Jika di CPU (atau AMD GPU), jalankan mode normal + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # Inisialisasi GradScaler HANYA jika di CUDA + scaler = torch.cuda.amp.GradScaler() if DEVICE == 'cuda' else None + if scaler: + print("Mixed Precision (AMP) diaktifkan.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + # Saat training, oper scaler. Fungsi akan menanganinya. + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE, scaler) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE, scaler) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # --- F. EVALUASI & REPORTING --- + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250926074216.py b/.history/src/train_20250926074216.py new file mode 100644 index 0000000000000000000000000000000000000000..8b5b6c69fba3e71ff61530d3b177e0e9474e09f0 --- /dev/null +++ b/.history/src/train_20250926074216.py @@ -0,0 +1,171 @@ +# src/train.py (Versi dengan Automatic Mixed Precision) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = '/outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_amp.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + class_counts = np.bincount(dataset.targets) + class_counts = class_counts[class_counts > 0] # Hanya gunakan kelas yang ada + class_weights = 1. / np.sqrt(class_counts) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device, scaler): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + + # Gunakan autocast HANYA jika di CUDA + if device == 'cuda': + with torch.cuda.amp.autocast(): + outputs = model(images) + loss = criterion(outputs, labels) + + scaler.scale(loss).backward() + scaler.step(optimizer) + scaler.update() + else: # Jika di CPU (atau AMD GPU), jalankan mode normal + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # Inisialisasi GradScaler HANYA jika di CUDA + scaler = torch.cuda.amp.GradScaler() if DEVICE == 'cuda' else None + if scaler: + print("Mixed Precision (AMP) diaktifkan.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + # Saat training, oper scaler. Fungsi akan menanganinya. + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE, scaler) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE, scaler) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # --- F. EVALUASI & REPORTING --- + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250926074220.py b/.history/src/train_20250926074220.py new file mode 100644 index 0000000000000000000000000000000000000000..7c1837d829e8c55987803d5afb7bbfc3b6d822f2 --- /dev/null +++ b/.history/src/train_20250926074220.py @@ -0,0 +1,171 @@ +# src/train.py (Versi dengan Automatic Mixed Precision) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_amp.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + class_counts = np.bincount(dataset.targets) + class_counts = class_counts[class_counts > 0] # Hanya gunakan kelas yang ada + class_weights = 1. / np.sqrt(class_counts) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device, scaler): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + + # Gunakan autocast HANYA jika di CUDA + if device == 'cuda': + with torch.cuda.amp.autocast(): + outputs = model(images) + loss = criterion(outputs, labels) + + scaler.scale(loss).backward() + scaler.step(optimizer) + scaler.update() + else: # Jika di CPU (atau AMD GPU), jalankan mode normal + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # Inisialisasi GradScaler HANYA jika di CUDA + scaler = torch.cuda.amp.GradScaler() if DEVICE == 'cuda' else None + if scaler: + print("Mixed Precision (AMP) diaktifkan.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + # Saat training, oper scaler. Fungsi akan menanganinya. + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE, scaler) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE, scaler) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # --- F. EVALUASI & REPORTING --- + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250926075134.py b/.history/src/train_20250926075134.py new file mode 100644 index 0000000000000000000000000000000000000000..ddcb67b81a954f565a891251bbf5a8646580079b --- /dev/null +++ b/.history/src/train_20250926075134.py @@ -0,0 +1,171 @@ +# src/train.py (Versi dengan Automatic Mixed Precision) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 5e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_amp.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + class_counts = np.bincount(dataset.targets) + class_counts = class_counts[class_counts > 0] # Hanya gunakan kelas yang ada + class_weights = 1. / np.sqrt(class_counts) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device, scaler): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + + # Gunakan autocast HANYA jika di CUDA + if device == 'cuda': + with torch.cuda.amp.autocast(): + outputs = model(images) + loss = criterion(outputs, labels) + + scaler.scale(loss).backward() + scaler.step(optimizer) + scaler.update() + else: # Jika di CPU (atau AMD GPU), jalankan mode normal + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # Inisialisasi GradScaler HANYA jika di CUDA + scaler = torch.cuda.amp.GradScaler() if DEVICE == 'cuda' else None + if scaler: + print("Mixed Precision (AMP) diaktifkan.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + # Saat training, oper scaler. Fungsi akan menanganinya. + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE, scaler) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE, scaler) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # --- F. EVALUASI & REPORTING --- + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250926075136.py b/.history/src/train_20250926075136.py new file mode 100644 index 0000000000000000000000000000000000000000..156319fd4ae557cc764a23c4bb36626d5276b051 --- /dev/null +++ b/.history/src/train_20250926075136.py @@ -0,0 +1,171 @@ +# src/train.py (Versi dengan Automatic Mixed Precision) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 50 +LEARNING_RATE_HEAD = 3e-3 +LEARNING_RATE_FINETUNE = 5e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_amp.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + class_counts = np.bincount(dataset.targets) + class_counts = class_counts[class_counts > 0] # Hanya gunakan kelas yang ada + class_weights = 1. / np.sqrt(class_counts) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device, scaler): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + + # Gunakan autocast HANYA jika di CUDA + if device == 'cuda': + with torch.cuda.amp.autocast(): + outputs = model(images) + loss = criterion(outputs, labels) + + scaler.scale(loss).backward() + scaler.step(optimizer) + scaler.update() + else: # Jika di CPU (atau AMD GPU), jalankan mode normal + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # Inisialisasi GradScaler HANYA jika di CUDA + scaler = torch.cuda.amp.GradScaler() if DEVICE == 'cuda' else None + if scaler: + print("Mixed Precision (AMP) diaktifkan.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + # Saat training, oper scaler. Fungsi akan menanganinya. + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE, scaler) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE, scaler) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # --- F. EVALUASI & REPORTING --- + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927095546.py b/.history/src/train_20250927095546.py new file mode 100644 index 0000000000000000000000000000000000000000..7c8d34701ab92c20a84cb9e25c2a317a4ad83462 --- /dev/null +++ b/.history/src/train_20250927095546.py @@ -0,0 +1,173 @@ +# src/train.py (Versi dengan Automatic Mixed Precision) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix +from torch.utils.data import WeightedRandomSampler + + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 50 +LEARNING_RATE_HEAD = 3e-3 +LEARNING_RATE_FINETUNE = 5e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_amp.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + class_counts = np.bincount(dataset.targets) + class_counts = class_counts[class_counts > 0] # Hanya gunakan kelas yang ada + class_weights = 1. / np.sqrt(class_counts) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device, scaler): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + + # Gunakan autocast HANYA jika di CUDA + if device == 'cuda': + with torch.cuda.amp.autocast(): + outputs = model(images) + loss = criterion(outputs, labels) + + scaler.scale(loss).backward() + scaler.step(optimizer) + scaler.update() + else: # Jika di CPU (atau AMD GPU), jalankan mode normal + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + train_loader, valid_loader, classes = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # Inisialisasi GradScaler HANYA jika di CUDA + scaler = torch.cuda.amp.GradScaler() if DEVICE == 'cuda' else None + if scaler: + print("Mixed Precision (AMP) diaktifkan.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + # Saat training, oper scaler. Fungsi akan menanganinya. + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE, scaler) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE, scaler) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # --- F. EVALUASI & REPORTING --- + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927095906.py b/.history/src/train_20250927095906.py new file mode 100644 index 0000000000000000000000000000000000000000..7922d1e7af0f4bf482541ac79663d6967aa1f9cd --- /dev/null +++ b/.history/src/train_20250927095906.py @@ -0,0 +1,173 @@ +# src/train.py (Versi dengan Automatic Mixed Precision) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix +from torch.utils.data import WeightedRandomSampler + + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 50 +LEARNING_RATE_HEAD = 3e-3 +LEARNING_RATE_FINETUNE = 5e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_amp.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + class_counts = np.bincount(dataset.targets) + class_counts = class_counts[class_counts > 0] # Hanya gunakan kelas yang ada + class_weights = 1. / np.sqrt(class_counts) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device, scaler): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + + # Gunakan autocast HANYA jika di CUDA + if device == 'cuda': + with torch.cuda.amp.autocast(): + outputs = model(images) + loss = criterion(outputs, labels) + + scaler.scale(loss).backward() + scaler.step(optimizer) + scaler.update() + else: # Jika di CPU (atau AMD GPU), jalankan mode normal + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + train_loader_unbalanced, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # Inisialisasi GradScaler HANYA jika di CUDA + scaler = torch.cuda.amp.GradScaler() if DEVICE == 'cuda' else None + if scaler: + print("Mixed Precision (AMP) diaktifkan.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + # Saat training, oper scaler. Fungsi akan menanganinya. + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE, scaler) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE, scaler) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # --- F. EVALUASI & REPORTING --- + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927100013.py b/.history/src/train_20250927100013.py new file mode 100644 index 0000000000000000000000000000000000000000..70f2541ebb687885976954b26de1273ac33ce737 --- /dev/null +++ b/.history/src/train_20250927100013.py @@ -0,0 +1,176 @@ +# src/train.py (Versi dengan Automatic Mixed Precision) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix +from torch.utils.data import WeightedRandomSampler + + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 50 +LEARNING_RATE_HEAD = 3e-3 +LEARNING_RATE_FINETUNE = 5e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_amp.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + class_counts = np.bincount(dataset.targets) + class_counts = class_counts[class_counts > 0] # Hanya gunakan kelas yang ada + class_weights = 1. / np.sqrt(class_counts) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device, scaler): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + + # Gunakan autocast HANYA jika di CUDA + if device == 'cuda': + with torch.cuda.amp.autocast(): + outputs = model(images) + loss = criterion(outputs, labels) + + scaler.scale(loss).backward() + scaler.step(optimizer) + scaler.update() + else: # Jika di CPU (atau AMD GPU), jalankan mode normal + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + train_loader_unbalanced, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + # Hitung bobot untuk setiap sampel +class_counts = np.bincount(train_dataset.targets) +class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) +sample_weights = class_weights[train_dataset.targets] + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # Inisialisasi GradScaler HANYA jika di CUDA + scaler = torch.cuda.amp.GradScaler() if DEVICE == 'cuda' else None + if scaler: + print("Mixed Precision (AMP) diaktifkan.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + # Saat training, oper scaler. Fungsi akan menanganinya. + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE, scaler) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE, scaler) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # --- F. EVALUASI & REPORTING --- + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927100016.py b/.history/src/train_20250927100016.py new file mode 100644 index 0000000000000000000000000000000000000000..7922d1e7af0f4bf482541ac79663d6967aa1f9cd --- /dev/null +++ b/.history/src/train_20250927100016.py @@ -0,0 +1,173 @@ +# src/train.py (Versi dengan Automatic Mixed Precision) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix +from torch.utils.data import WeightedRandomSampler + + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 50 +LEARNING_RATE_HEAD = 3e-3 +LEARNING_RATE_FINETUNE = 5e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_amp.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + class_counts = np.bincount(dataset.targets) + class_counts = class_counts[class_counts > 0] # Hanya gunakan kelas yang ada + class_weights = 1. / np.sqrt(class_counts) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device, scaler): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + + # Gunakan autocast HANYA jika di CUDA + if device == 'cuda': + with torch.cuda.amp.autocast(): + outputs = model(images) + loss = criterion(outputs, labels) + + scaler.scale(loss).backward() + scaler.step(optimizer) + scaler.update() + else: # Jika di CPU (atau AMD GPU), jalankan mode normal + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + train_loader_unbalanced, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # Inisialisasi GradScaler HANYA jika di CUDA + scaler = torch.cuda.amp.GradScaler() if DEVICE == 'cuda' else None + if scaler: + print("Mixed Precision (AMP) diaktifkan.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + # Saat training, oper scaler. Fungsi akan menanganinya. + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE, scaler) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE, scaler) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # --- F. EVALUASI & REPORTING --- + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927100023.py b/.history/src/train_20250927100023.py new file mode 100644 index 0000000000000000000000000000000000000000..70f2541ebb687885976954b26de1273ac33ce737 --- /dev/null +++ b/.history/src/train_20250927100023.py @@ -0,0 +1,176 @@ +# src/train.py (Versi dengan Automatic Mixed Precision) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix +from torch.utils.data import WeightedRandomSampler + + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 50 +LEARNING_RATE_HEAD = 3e-3 +LEARNING_RATE_FINETUNE = 5e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_amp.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + class_counts = np.bincount(dataset.targets) + class_counts = class_counts[class_counts > 0] # Hanya gunakan kelas yang ada + class_weights = 1. / np.sqrt(class_counts) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device, scaler): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + + # Gunakan autocast HANYA jika di CUDA + if device == 'cuda': + with torch.cuda.amp.autocast(): + outputs = model(images) + loss = criterion(outputs, labels) + + scaler.scale(loss).backward() + scaler.step(optimizer) + scaler.update() + else: # Jika di CPU (atau AMD GPU), jalankan mode normal + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + train_loader_unbalanced, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + # Hitung bobot untuk setiap sampel +class_counts = np.bincount(train_dataset.targets) +class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) +sample_weights = class_weights[train_dataset.targets] + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # Inisialisasi GradScaler HANYA jika di CUDA + scaler = torch.cuda.amp.GradScaler() if DEVICE == 'cuda' else None + if scaler: + print("Mixed Precision (AMP) diaktifkan.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + # Saat training, oper scaler. Fungsi akan menanganinya. + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE, scaler) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE, scaler) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # --- F. EVALUASI & REPORTING --- + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927100028.py b/.history/src/train_20250927100028.py new file mode 100644 index 0000000000000000000000000000000000000000..42217d5f6a224e6847f851f5b4aaf7dea207c094 --- /dev/null +++ b/.history/src/train_20250927100028.py @@ -0,0 +1,176 @@ +# src/train.py (Versi dengan Automatic Mixed Precision) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix +from torch.utils.data import WeightedRandomSampler + + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 50 +LEARNING_RATE_HEAD = 3e-3 +LEARNING_RATE_FINETUNE = 5e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_amp.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + class_counts = np.bincount(dataset.targets) + class_counts = class_counts[class_counts > 0] # Hanya gunakan kelas yang ada + class_weights = 1. / np.sqrt(class_counts) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device, scaler): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + + # Gunakan autocast HANYA jika di CUDA + if device == 'cuda': + with torch.cuda.amp.autocast(): + outputs = model(images) + loss = criterion(outputs, labels) + + scaler.scale(loss).backward() + scaler.step(optimizer) + scaler.update() + else: # Jika di CPU (atau AMD GPU), jalankan mode normal + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + train_loader_unbalanced, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + # Hitung bobot untuk setiap sampel + class_counts = np.bincount(train_dataset.targets) +class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) +sample_weights = class_weights[train_dataset.targets] + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # Inisialisasi GradScaler HANYA jika di CUDA + scaler = torch.cuda.amp.GradScaler() if DEVICE == 'cuda' else None + if scaler: + print("Mixed Precision (AMP) diaktifkan.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + # Saat training, oper scaler. Fungsi akan menanganinya. + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE, scaler) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE, scaler) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # --- F. EVALUASI & REPORTING --- + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927100029.py b/.history/src/train_20250927100029.py new file mode 100644 index 0000000000000000000000000000000000000000..cf3edd77d34418ed1dbd2f8ff7de3aa34722b479 --- /dev/null +++ b/.history/src/train_20250927100029.py @@ -0,0 +1,176 @@ +# src/train.py (Versi dengan Automatic Mixed Precision) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix +from torch.utils.data import WeightedRandomSampler + + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 50 +LEARNING_RATE_HEAD = 3e-3 +LEARNING_RATE_FINETUNE = 5e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_amp.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + class_counts = np.bincount(dataset.targets) + class_counts = class_counts[class_counts > 0] # Hanya gunakan kelas yang ada + class_weights = 1. / np.sqrt(class_counts) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device, scaler): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + + # Gunakan autocast HANYA jika di CUDA + if device == 'cuda': + with torch.cuda.amp.autocast(): + outputs = model(images) + loss = criterion(outputs, labels) + + scaler.scale(loss).backward() + scaler.step(optimizer) + scaler.update() + else: # Jika di CPU (atau AMD GPU), jalankan mode normal + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + train_loader_unbalanced, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + # Hitung bobot untuk setiap sampel + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) +sample_weights = class_weights[train_dataset.targets] + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # Inisialisasi GradScaler HANYA jika di CUDA + scaler = torch.cuda.amp.GradScaler() if DEVICE == 'cuda' else None + if scaler: + print("Mixed Precision (AMP) diaktifkan.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + # Saat training, oper scaler. Fungsi akan menanganinya. + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE, scaler) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE, scaler) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # --- F. EVALUASI & REPORTING --- + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927100031.py b/.history/src/train_20250927100031.py new file mode 100644 index 0000000000000000000000000000000000000000..64338fd5abe4aba3809b6ba27a03ecb0f0bea6cd --- /dev/null +++ b/.history/src/train_20250927100031.py @@ -0,0 +1,176 @@ +# src/train.py (Versi dengan Automatic Mixed Precision) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix +from torch.utils.data import WeightedRandomSampler + + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 50 +LEARNING_RATE_HEAD = 3e-3 +LEARNING_RATE_FINETUNE = 5e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_amp.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + class_counts = np.bincount(dataset.targets) + class_counts = class_counts[class_counts > 0] # Hanya gunakan kelas yang ada + class_weights = 1. / np.sqrt(class_counts) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device, scaler): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + + # Gunakan autocast HANYA jika di CUDA + if device == 'cuda': + with torch.cuda.amp.autocast(): + outputs = model(images) + loss = criterion(outputs, labels) + + scaler.scale(loss).backward() + scaler.step(optimizer) + scaler.update() + else: # Jika di CPU (atau AMD GPU), jalankan mode normal + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + train_loader_unbalanced, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + # Hitung bobot untuk setiap sampel + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + class_weights = calculate_class_weights(train_loader.dataset) + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # Inisialisasi GradScaler HANYA jika di CUDA + scaler = torch.cuda.amp.GradScaler() if DEVICE == 'cuda' else None + if scaler: + print("Mixed Precision (AMP) diaktifkan.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + # Saat training, oper scaler. Fungsi akan menanganinya. + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE, scaler) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE, scaler) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # --- F. EVALUASI & REPORTING --- + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927100036.py b/.history/src/train_20250927100036.py new file mode 100644 index 0000000000000000000000000000000000000000..49080ede04a99b4f1678289b2dba87d778d89e48 --- /dev/null +++ b/.history/src/train_20250927100036.py @@ -0,0 +1,175 @@ +# src/train.py (Versi dengan Automatic Mixed Precision) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix +from torch.utils.data import WeightedRandomSampler + + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 50 +LEARNING_RATE_HEAD = 3e-3 +LEARNING_RATE_FINETUNE = 5e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_amp.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + class_counts = np.bincount(dataset.targets) + class_counts = class_counts[class_counts > 0] # Hanya gunakan kelas yang ada + class_weights = 1. / np.sqrt(class_counts) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device, scaler): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + + # Gunakan autocast HANYA jika di CUDA + if device == 'cuda': + with torch.cuda.amp.autocast(): + outputs = model(images) + loss = criterion(outputs, labels) + + scaler.scale(loss).backward() + scaler.step(optimizer) + scaler.update() + else: # Jika di CPU (atau AMD GPU), jalankan mode normal + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + train_loader_unbalanced, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + # Hitung bobot untuk setiap sampel + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # Inisialisasi GradScaler HANYA jika di CUDA + scaler = torch.cuda.amp.GradScaler() if DEVICE == 'cuda' else None + if scaler: + print("Mixed Precision (AMP) diaktifkan.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + # Saat training, oper scaler. Fungsi akan menanganinya. + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE, scaler) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE, scaler) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # --- F. EVALUASI & REPORTING --- + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927100123.py b/.history/src/train_20250927100123.py new file mode 100644 index 0000000000000000000000000000000000000000..b369d022cb168653c07e7c346e9d1fdb6e0bd819 --- /dev/null +++ b/.history/src/train_20250927100123.py @@ -0,0 +1,171 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = '../data' +OUTPUT_DIR = '../outputs' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 100 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss().to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927100125.py b/.history/src/train_20250927100125.py new file mode 100644 index 0000000000000000000000000000000000000000..49080ede04a99b4f1678289b2dba87d778d89e48 --- /dev/null +++ b/.history/src/train_20250927100125.py @@ -0,0 +1,175 @@ +# src/train.py (Versi dengan Automatic Mixed Precision) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix +from torch.utils.data import WeightedRandomSampler + + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 50 +LEARNING_RATE_HEAD = 3e-3 +LEARNING_RATE_FINETUNE = 5e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_amp.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI UNTUK MENGATASI CLASS IMBALANCE --- +def calculate_class_weights(dataset): + class_counts = np.bincount(dataset.targets) + class_counts = class_counts[class_counts > 0] # Hanya gunakan kelas yang ada + class_weights = 1. / np.sqrt(class_counts) + return torch.tensor(class_weights, dtype=torch.float).to(DEVICE) + +# --- 3. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device, scaler): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + + # Gunakan autocast HANYA jika di CUDA + if device == 'cuda': + with torch.cuda.amp.autocast(): + outputs = model(images) + loss = criterion(outputs, labels) + + scaler.scale(loss).backward() + scaler.step(optimizer) + scaler.update() + else: # Jika di CPU (atau AMD GPU), jalankan mode normal + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 4. SCRIPT UTAMA --- +if __name__ == '__main__': + train_loader_unbalanced, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + # Hitung bobot untuk setiap sampel + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + print(f"Class weights untuk mengatasi imbalance: {class_weights}") + criterion = nn.CrossEntropyLoss(weight=class_weights) + + # Inisialisasi GradScaler HANYA jika di CUDA + scaler = torch.cuda.amp.GradScaler() if DEVICE == 'cuda' else None + if scaler: + print("Mixed Precision (AMP) diaktifkan.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + # Saat training, oper scaler. Fungsi akan menanganinya. + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE, scaler) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE, scaler) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # --- F. EVALUASI & REPORTING --- + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927100127.py b/.history/src/train_20250927100127.py new file mode 100644 index 0000000000000000000000000000000000000000..b369d022cb168653c07e7c346e9d1fdb6e0bd819 --- /dev/null +++ b/.history/src/train_20250927100127.py @@ -0,0 +1,171 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = '../data' +OUTPUT_DIR = '../outputs' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 100 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss().to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927100229.py b/.history/src/train_20250927100229.py new file mode 100644 index 0000000000000000000000000000000000000000..792a9bc36092dc3a98fddb3fb5c946ecae42f4ae --- /dev/null +++ b/.history/src/train_20250927100229.py @@ -0,0 +1,172 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = '../data' +OUTPUT_DIR = '../outputs' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 100 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss().to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927100247.py b/.history/src/train_20250927100247.py new file mode 100644 index 0000000000000000000000000000000000000000..92080874af5968c29010d4e24ca6712e38dd0b3b --- /dev/null +++ b/.history/src/train_20250927100247.py @@ -0,0 +1,172 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = '../data' +OUTPUT_DIR = '../outputs' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 100 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss() + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss().to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927100555.py b/.history/src/train_20250927100555.py new file mode 100644 index 0000000000000000000000000000000000000000..40934a58a6d03f9a4702416b0003e3ef8cb69976 --- /dev/null +++ b/.history/src/train_20250927100555.py @@ -0,0 +1,172 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = '/data' +OUTPUT_DIR = '../outputs' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 100 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss() + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss().to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927100556.py b/.history/src/train_20250927100556.py new file mode 100644 index 0000000000000000000000000000000000000000..92c54d86dab7fdcdb44b08efa91daf3d90012db0 --- /dev/null +++ b/.history/src/train_20250927100556.py @@ -0,0 +1,172 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = '../outputs' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 100 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss() + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss().to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927100558.py b/.history/src/train_20250927100558.py new file mode 100644 index 0000000000000000000000000000000000000000..d284c29737c15adc698a6ae1f164f1222332f219 --- /dev/null +++ b/.history/src/train_20250927100558.py @@ -0,0 +1,172 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = '/outputs' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 100 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss() + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss().to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927100559.py b/.history/src/train_20250927100559.py new file mode 100644 index 0000000000000000000000000000000000000000..f2f44f7905e860634894b2205957e5d159d2cea0 --- /dev/null +++ b/.history/src/train_20250927100559.py @@ -0,0 +1,172 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 100 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss() + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss().to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927100603.py b/.history/src/train_20250927100603.py new file mode 100644 index 0000000000000000000000000000000000000000..a2d19e8190467d32bf4888ba9cc6c51b32aa98a7 --- /dev/null +++ b/.history/src/train_20250927100603.py @@ -0,0 +1,172 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs.models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 100 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss() + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss().to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927100607.py b/.history/src/train_20250927100607.py new file mode 100644 index 0000000000000000000000000000000000000000..3a6357d0fa3de001237b7542bce7fc7e071fe8e5 --- /dev/null +++ b/.history/src/train_20250927100607.py @@ -0,0 +1,172 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 100 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss() + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss().to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927100611.py b/.history/src/train_20250927100611.py new file mode 100644 index 0000000000000000000000000000000000000000..dee15c3377046e5df11a5cd136b1a9af42c89076 --- /dev/null +++ b/.history/src/train_20250927100611.py @@ -0,0 +1,172 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 0 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss() + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss().to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927100613.py b/.history/src/train_20250927100613.py new file mode 100644 index 0000000000000000000000000000000000000000..ea41d75581c5d82eb24b737b45a27b5a03b7f1b9 --- /dev/null +++ b/.history/src/train_20250927100613.py @@ -0,0 +1,172 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss() + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss().to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927100750.py b/.history/src/train_20250927100750.py new file mode 100644 index 0000000000000000000000000000000000000000..e20f810ed88bbab3b970e85dbc76ed147fe8d871 --- /dev/null +++ b/.history/src/train_20250927100750.py @@ -0,0 +1,172 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +EPOCHS = 30 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss() + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss().to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927101237.py b/.history/src/train_20250927101237.py new file mode 100644 index 0000000000000000000000000000000000000000..c42a9335013a7130a72063dae761c799960666e5 --- /dev/null +++ b/.history/src/train_20250927101237.py @@ -0,0 +1,172 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 30 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss() + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss().to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927131956.py b/.history/src/train_20250927131956.py new file mode 100644 index 0000000000000000000000000000000000000000..a2b85a83a6ce6492c5519b22befb70a35a19cbe2 --- /dev/null +++ b/.history/src/train_20250927131956.py @@ -0,0 +1,172 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss() + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss().to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927160422.py b/.history/src/train_20250927160422.py new file mode 100644 index 0000000000000000000000000000000000000000..6661fbb9352f88f8282bcf38137de55485f74dbe --- /dev/null +++ b/.history/src/train_20250927160422.py @@ -0,0 +1,173 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss() + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss(alpha=[1, 1, 1, 2], gamma=2).to(DEVICE) + + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927160424.py b/.history/src/train_20250927160424.py new file mode 100644 index 0000000000000000000000000000000000000000..9794efaeaee199a987ca895addb1d46e1705ffb7 --- /dev/null +++ b/.history/src/train_20250927160424.py @@ -0,0 +1,172 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss() + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss(alpha=[1, 1, 1, 2], gamma=2).to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927160610.py b/.history/src/train_20250927160610.py new file mode 100644 index 0000000000000000000000000000000000000000..1b827c513b27d596c295df339cd43d0cd9962ca6 --- /dev/null +++ b/.history/src/train_20250927160610.py @@ -0,0 +1,172 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 30 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss() + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss(alpha=[1, 1, 1, 2], gamma=2).to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927160816.py b/.history/src/train_20250927160816.py new file mode 100644 index 0000000000000000000000000000000000000000..ba28cc28bc02bf95681293530ce5766e36cabc39 --- /dev/null +++ b/.history/src/train_20250927160816.py @@ -0,0 +1,172 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 30 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss() + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = create_model(num_classes=len(dataset_classes)).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss(alpha=[1, 1, 1, 2], gamma=2).to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927160834.py b/.history/src/train_20250927160834.py new file mode 100644 index 0000000000000000000000000000000000000000..bd682ca97a482bf48db811e832db8430a783d580 --- /dev/null +++ b/.history/src/train_20250927160834.py @@ -0,0 +1,172 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 30 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss() + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + v + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss(alpha=[1, 1, 1, 2], gamma=2).to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927160836.py b/.history/src/train_20250927160836.py new file mode 100644 index 0000000000000000000000000000000000000000..4bd376642543474016e355b3a66b3bc70480962a --- /dev/null +++ b/.history/src/train_20250927160836.py @@ -0,0 +1,172 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 30 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss() + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = RetinaClassifier(num_classes=len(dataset_classes)).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss(alpha=[1, 1, 1, 2], gamma=2).to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927160848.py b/.history/src/train_20250927160848.py new file mode 100644 index 0000000000000000000000000000000000000000..2f5e7a06585a8bd4b6456b2e7615243203d49913 --- /dev/null +++ b/.history/src/train_20250927160848.py @@ -0,0 +1,173 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import RetinaClassifier +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 30 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss() + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = RetinaClassifier(num_classes=len(dataset_classes)).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss(alpha=[1, 1, 1, 2], gamma=2).to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927160911.py b/.history/src/train_20250927160911.py new file mode 100644 index 0000000000000000000000000000000000000000..1c7ab1be849944ad64646b3e9ec8b4c1868fcf32 --- /dev/null +++ b/.history/src/train_20250927160911.py @@ -0,0 +1,173 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import RetinaClassifier +# from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 30 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss() + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = RetinaClassifier(num_classes=len(dataset_classes)).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss(alpha=[1, 1, 1, 2], gamma=2).to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927161104.py b/.history/src/train_20250927161104.py new file mode 100644 index 0000000000000000000000000000000000000000..83a0930dd86355dd47bd1aa8c9b9bd0a0f171b0f --- /dev/null +++ b/.history/src/train_20250927161104.py @@ -0,0 +1,174 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import RetinaClassifier +# from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 30 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss() + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = RetinaClassifier(num_classes=len(classes)).to(DEVICE) + + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss(alpha=[1, 1, 1, 2], gamma=2).to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927161106.py b/.history/src/train_20250927161106.py new file mode 100644 index 0000000000000000000000000000000000000000..75875e2a10c5e2c5dc37267612aee2b128791ec3 --- /dev/null +++ b/.history/src/train_20250927161106.py @@ -0,0 +1,173 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import RetinaClassifier +# from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 30 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss() + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = RetinaClassifier(num_classes=len(classes)).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss(alpha=[1, 1, 1, 2], gamma=2).to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927161456.py b/.history/src/train_20250927161456.py new file mode 100644 index 0000000000000000000000000000000000000000..83bdd38c1a9c3e0739a63e80e937e755d2185d8f --- /dev/null +++ b/.history/src/train_20250927161456.py @@ -0,0 +1,177 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import RetinaClassifier +# from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 30 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss() + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = RetinaClassifier(num_classes=len(classes)).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss(alpha=[1, 1, 1, 2], gamma=2).to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- +# --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- +print("\n--- TAHAP 1: Melatih Classifier Head ---") +for param in model.parameters(): + param.requires_grad = False + +# Ambil classifier layer dari backbone timm +classifier_module = model.model.get_classifier() +for param in classifier_module.parameters(): + param.requires_grad = True + +optimizer_head = optim.AdamW(classifier_module.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + +for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927161500.py b/.history/src/train_20250927161500.py new file mode 100644 index 0000000000000000000000000000000000000000..c09aeb4985832b0bbb1ee69dda4bf7fe4b22f56b --- /dev/null +++ b/.history/src/train_20250927161500.py @@ -0,0 +1,177 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import RetinaClassifier +# from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 30 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss() + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = RetinaClassifier(num_classes=len(classes)).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss(alpha=[1, 1, 1, 2], gamma=2).to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- +# --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- +print("\n--- TAHAP 1: Melatih Classifier Head ---") +for param in model.parameters(): + param.requires_grad = False + +# Ambil classifier layer dari backbone timm +classifier_module = model.model.get_classifier() +for param in classifier_module.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(classifier_module.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + +for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927161505.py b/.history/src/train_20250927161505.py new file mode 100644 index 0000000000000000000000000000000000000000..3adfb8548f36654b407b387a8e33f71688613abc --- /dev/null +++ b/.history/src/train_20250927161505.py @@ -0,0 +1,177 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import RetinaClassifier +# from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 30 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss() + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = RetinaClassifier(num_classes=len(classes)).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss(alpha=[1, 1, 1, 2], gamma=2).to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- +# --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- +print("\n--- TAHAP 1: Melatih Classifier Head ---") +for param in model.parameters(): + param.requires_grad = False + +# Ambil classifier layer dari backbone timm + classifier_module = model.model.get_classifier() +for param in classifier_module.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(classifier_module.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + +for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927161524.py b/.history/src/train_20250927161524.py new file mode 100644 index 0000000000000000000000000000000000000000..11b43782bec52b45fd9421cf05be6e4e43f5b1b0 --- /dev/null +++ b/.history/src/train_20250927161524.py @@ -0,0 +1,176 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import RetinaClassifier +# from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 30 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss() + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = RetinaClassifier(num_classes=len(classes)).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss(alpha=[1, 1, 1, 2], gamma=2).to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune +# --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- +print("\n--- TAHAP 1: Melatih Classifier Head ---") +for param in model.parameters(): + param.requires_grad = False + +# Ambil classifier layer dari backbone timm + classifier_module = model.model.get_classifier() +for param in classifier_module.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(classifier_module.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + +for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927161601.py b/.history/src/train_20250927161601.py new file mode 100644 index 0000000000000000000000000000000000000000..6515a39e0e00cbb13711eb71434db70d584f90ae --- /dev/null +++ b/.history/src/train_20250927161601.py @@ -0,0 +1,178 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import RetinaClassifier +# from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 30 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss() + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = RetinaClassifier(num_classes=len(classes)).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss(alpha=[1, 1, 1, 2], gamma=2).to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune +# --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- +# --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- +print("\n--- TAHAP 1: Melatih Classifier Head ---") +for param in model.parameters(): + param.requires_grad = False + +# Ambil classifier layer dari backbone timm +classifier_module = model.backbone.get_classifier() +for param in classifier_module.parameters(): + param.requires_grad = True + +optimizer_head = optim.AdamW(classifier_module.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + +for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927161604.py b/.history/src/train_20250927161604.py new file mode 100644 index 0000000000000000000000000000000000000000..00bdf743db276d8be5c9b5aa95f7adc73b202500 --- /dev/null +++ b/.history/src/train_20250927161604.py @@ -0,0 +1,178 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import RetinaClassifier +# from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 30 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss() + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = RetinaClassifier(num_classes=len(classes)).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss(alpha=[1, 1, 1, 2], gamma=2).to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune +# --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- +# --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- +print("\n--- TAHAP 1: Melatih Classifier Head ---") +for param in model.parameters(): + param.requires_grad = False + +# Ambil classifier layer dari backbone timm +classifier_module = model.backbone.get_classifier() +for param in classifier_module.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(classifier_module.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + +for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927161606.py b/.history/src/train_20250927161606.py new file mode 100644 index 0000000000000000000000000000000000000000..6515a39e0e00cbb13711eb71434db70d584f90ae --- /dev/null +++ b/.history/src/train_20250927161606.py @@ -0,0 +1,178 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import RetinaClassifier +# from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 30 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss() + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = RetinaClassifier(num_classes=len(classes)).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss(alpha=[1, 1, 1, 2], gamma=2).to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune +# --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- +# --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- +print("\n--- TAHAP 1: Melatih Classifier Head ---") +for param in model.parameters(): + param.requires_grad = False + +# Ambil classifier layer dari backbone timm +classifier_module = model.backbone.get_classifier() +for param in classifier_module.parameters(): + param.requires_grad = True + +optimizer_head = optim.AdamW(classifier_module.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + +for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927161609.py b/.history/src/train_20250927161609.py new file mode 100644 index 0000000000000000000000000000000000000000..ec65baa239849ac4867afd86aeac5d8144f2fec6 --- /dev/null +++ b/.history/src/train_20250927161609.py @@ -0,0 +1,178 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import RetinaClassifier +# from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 30 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss() + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = RetinaClassifier(num_classes=len(classes)).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss(alpha=[1, 1, 1, 2], gamma=2).to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune +# --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- +# --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- +print("\n--- TAHAP 1: Melatih Classifier Head ---") +for param in model.parameters(): + param.requires_grad = False + +# Ambil classifier layer dari backbone timm + classifier_module = model.backbone.get_classifier() +for param in classifier_module.parameters(): + param.requires_grad = True + +optimizer_head = optim.AdamW(classifier_module.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + +for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927161611.py b/.history/src/train_20250927161611.py new file mode 100644 index 0000000000000000000000000000000000000000..107115fe1e5b590a1327d6f13e86cf1f14950241 --- /dev/null +++ b/.history/src/train_20250927161611.py @@ -0,0 +1,178 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import RetinaClassifier +# from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 30 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss() + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = RetinaClassifier(num_classes=len(classes)).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss(alpha=[1, 1, 1, 2], gamma=2).to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune +# --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- +# --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- +print("\n--- TAHAP 1: Melatih Classifier Head ---") +for param in model.parameters(): + param.requires_grad = False + +# Ambil classifier layer dari backbone timm + classifier_module = model.backbone.get_classifier() +for param in classifier_module.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(classifier_module.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + +for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927161615.py b/.history/src/train_20250927161615.py new file mode 100644 index 0000000000000000000000000000000000000000..75da6da4fc5d7fca564114e1defb4b868076e6d4 --- /dev/null +++ b/.history/src/train_20250927161615.py @@ -0,0 +1,177 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import RetinaClassifier +# from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 30 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss() + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=4 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = RetinaClassifier(num_classes=len(classes)).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss(alpha=[1, 1, 1, 2], gamma=2).to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune +# --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- +print("\n--- TAHAP 1: Melatih Classifier Head ---") +for param in model.parameters(): + param.requires_grad = False + +# Ambil classifier layer dari backbone timm + classifier_module = model.backbone.get_classifier() +for param in classifier_module.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(classifier_module.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + +for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927161957.py b/.history/src/train_20250927161957.py new file mode 100644 index 0000000000000000000000000000000000000000..790be329a62a5745bb4c6b8e4f96c02f0dd0b734 --- /dev/null +++ b/.history/src/train_20250927161957.py @@ -0,0 +1,177 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import RetinaClassifier +# from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 30 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss() + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=0 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = RetinaClassifier(num_classes=len(classes)).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss(alpha=[1, 1, 1, 2], gamma=2).to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune +# --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- +print("\n--- TAHAP 1: Melatih Classifier Head ---") +for param in model.parameters(): + param.requires_grad = False + +# Ambil classifier layer dari backbone timm + classifier_module = model.backbone.get_classifier() +for param in classifier_module.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(classifier_module.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + +for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927162432.py b/.history/src/train_20250927162432.py new file mode 100644 index 0000000000000000000000000000000000000000..a5d5dc7f875851640ebb8a5adb4215be3ce900ec --- /dev/null +++ b/.history/src/train_20250927162432.py @@ -0,0 +1,178 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import RetinaClassifier +# from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 30 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss().to(DEVICE) + + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=0 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = RetinaClassifier(num_classes=len(classes)).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss(alpha=[1, 1, 1, 2], gamma=2).to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune +# --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- +print("\n--- TAHAP 1: Melatih Classifier Head ---") +for param in model.parameters(): + param.requires_grad = False + +# Ambil classifier layer dari backbone timm + classifier_module = model.backbone.get_classifier() +for param in classifier_module.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(classifier_module.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + +for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927162433.py b/.history/src/train_20250927162433.py new file mode 100644 index 0000000000000000000000000000000000000000..c39f248ee70f6ec7349ac32d24eb8846480507a0 --- /dev/null +++ b/.history/src/train_20250927162433.py @@ -0,0 +1,177 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import RetinaClassifier +# from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 30 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss().to(DEVICE) + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=0 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = RetinaClassifier(num_classes=len(classes)).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss(alpha=[1, 1, 1, 2], gamma=2).to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune +# --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- +print("\n--- TAHAP 1: Melatih Classifier Head ---") +for param in model.parameters(): + param.requires_grad = False + +# Ambil classifier layer dari backbone timm + classifier_module = model.backbone.get_classifier() +for param in classifier_module.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(classifier_module.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + +for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927162816.py b/.history/src/train_20250927162816.py new file mode 100644 index 0000000000000000000000000000000000000000..70515ed7ac44d8aab61e41d73670954ebc34359d --- /dev/null +++ b/.history/src/train_20250927162816.py @@ -0,0 +1,178 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import RetinaClassifier +# from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 30 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss().to(DEVICE) + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=0 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = RetinaClassifier(num_classes=len(classes)).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss(alpha=[1, 1, 1, 2], gamma=2).to(DEVICE) + + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune +# --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- +print("\n--- TAHAP 1: Melatih Classifier Head ---") +for param in model.parameters(): + param.requires_grad = False + +# Ambil classifier layer dari backbone timm + classifier_module = model.backbone.get_classifier() +for param in classifier_module.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(classifier_module.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + +for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927162817.py b/.history/src/train_20250927162817.py new file mode 100644 index 0000000000000000000000000000000000000000..c39f248ee70f6ec7349ac32d24eb8846480507a0 --- /dev/null +++ b/.history/src/train_20250927162817.py @@ -0,0 +1,177 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import RetinaClassifier +# from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 30 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss().to(DEVICE) + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=0 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = RetinaClassifier(num_classes=len(classes)).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss(alpha=[1, 1, 1, 2], gamma=2).to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune +# --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- +print("\n--- TAHAP 1: Melatih Classifier Head ---") +for param in model.parameters(): + param.requires_grad = False + +# Ambil classifier layer dari backbone timm + classifier_module = model.backbone.get_classifier() +for param in classifier_module.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(classifier_module.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + +for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927163653.py b/.history/src/train_20250927163653.py new file mode 100644 index 0000000000000000000000000000000000000000..d2944772cfc0eb74ac4e68e16a78fcc269d9df2b --- /dev/null +++ b/.history/src/train_20250927163653.py @@ -0,0 +1,178 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import RetinaClassifier +# from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 30 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss().to(DEVICE) + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=0 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = RetinaClassifier(num_classes=len(classes)).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss(alpha=[1, 1, 1, 2], gamma=2).to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune +# --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- +print("\n--- TAHAP 1: Melatih Classifier Head ---") +for param in model.backbone.parameters(): + param.requires_grad = False + + +# Ambil classifier layer dari backbone timm + classifier_module = model.backbone.get_classifier() +for param in classifier_module.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(classifier_module.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + +for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927163654.py b/.history/src/train_20250927163654.py new file mode 100644 index 0000000000000000000000000000000000000000..331331a82ef4184b8b77693c9f89c799ace29aba --- /dev/null +++ b/.history/src/train_20250927163654.py @@ -0,0 +1,177 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import RetinaClassifier +# from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 30 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss().to(DEVICE) + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=0 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = RetinaClassifier(num_classes=len(classes)).to(DEVICE) + + # C. Menggunakan Focal Loss + # Ganti dari CrossEntropyLoss menjadi FocalLoss + criterion = FocalLoss(alpha=[1, 1, 1, 2], gamma=2).to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + # D. Strategi Fine-tune +# --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- +print("\n--- TAHAP 1: Melatih Classifier Head ---") +for param in model.backbone.parameters(): + param.requires_grad = False + +# Ambil classifier layer dari backbone timm + classifier_module = model.backbone.get_classifier() +for param in classifier_module.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(classifier_module.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + +for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927163954.py b/.history/src/train_20250927163954.py new file mode 100644 index 0000000000000000000000000000000000000000..1df12fd7cfc19fd22d3f196c0ab3eef9fcbec88f --- /dev/null +++ b/.history/src/train_20250927163954.py @@ -0,0 +1,177 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import RetinaClassifier +# from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 30 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss().to(DEVICE) + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=0 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = RetinaClassifier(num_classes=len(classes)).to(DEVICE) + +# C. Menggunakan Focal Loss +criterion = FocalLoss(alpha=[1, 1, 1, 2], gamma=2).to(DEVICE) +print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + + # D. Strategi Fine-tune +# --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- +print("\n--- TAHAP 1: Melatih Classifier Head ---") +for param in model.backbone.parameters(): + param.requires_grad = False + +# Ambil classifier layer dari backbone timm + classifier_module = model.backbone.get_classifier() +for param in classifier_module.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(classifier_module.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + +for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927163957.py b/.history/src/train_20250927163957.py new file mode 100644 index 0000000000000000000000000000000000000000..b53269de70a62ed4997499758d33633650db169a --- /dev/null +++ b/.history/src/train_20250927163957.py @@ -0,0 +1,177 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import RetinaClassifier +# from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 30 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + criterion = FocalLoss().to(DEVICE) + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=0 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = RetinaClassifier(num_classes=len(classes)).to(DEVICE) + +# C. Menggunakan Focal Loss + criterion = FocalLoss(alpha=[1, 1, 1, 2], gamma=2).to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + + # D. Strategi Fine-tune +# --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- +print("\n--- TAHAP 1: Melatih Classifier Head ---") +for param in model.backbone.parameters(): + param.requires_grad = False + +# Ambil classifier layer dari backbone timm + classifier_module = model.backbone.get_classifier() +for param in classifier_module.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(classifier_module.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + +for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927164001.py b/.history/src/train_20250927164001.py new file mode 100644 index 0000000000000000000000000000000000000000..965b60ec3e0dda0a2e235ef57ca5b3014d122e0b --- /dev/null +++ b/.history/src/train_20250927164001.py @@ -0,0 +1,176 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import RetinaClassifier +# from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 30 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=0 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = RetinaClassifier(num_classes=len(classes)).to(DEVICE) + +# C. Menggunakan Focal Loss + criterion = FocalLoss(alpha=[1, 1, 1, 2], gamma=2).to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + + # D. Strategi Fine-tune +# --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- +print("\n--- TAHAP 1: Melatih Classifier Head ---") +for param in model.backbone.parameters(): + param.requires_grad = False + +# Ambil classifier layer dari backbone timm + classifier_module = model.backbone.get_classifier() +for param in classifier_module.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(classifier_module.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + +for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927164003.py b/.history/src/train_20250927164003.py new file mode 100644 index 0000000000000000000000000000000000000000..8968c1481bb8f20a200fa7b38e3b73f766973e15 --- /dev/null +++ b/.history/src/train_20250927164003.py @@ -0,0 +1,177 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import RetinaClassifier +# from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 30 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=0 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = RetinaClassifier(num_classes=len(classes)).to(DEVICE) + +# C. Menggunakan Focal Loss + criterion = FocalLoss(alpha=[1, 1, 1, 2], gamma=2).to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + + # D. Strategi Fine-tune +# --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- +print("\n--- TAHAP 1: Melatih Classifier Head ---") +for param in model.backbone.parameters(): + param.requires_grad = False + +# Ambil classifier layer dari backbone timm + classifier_module = model.backbone.get_classifier() +for param in classifier_module.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(classifier_module.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + +for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927164004.py b/.history/src/train_20250927164004.py new file mode 100644 index 0000000000000000000000000000000000000000..965b60ec3e0dda0a2e235ef57ca5b3014d122e0b --- /dev/null +++ b/.history/src/train_20250927164004.py @@ -0,0 +1,176 @@ +# src/train.py (Versi Final dengan Balanced Sampler & Focal Loss) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# Impor dari file lain dalam proyek +from model import RetinaClassifier +# from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss # Pastikan FocalLoss diimpor + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +EPOCHS = 30 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_vit_model_balanced.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI (Tidak ada perubahan di sini) --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + # A. Persiapan Data & Implementasi WeightedRandomSampler + # Panggil get_dataloaders untuk mendapatkan dataset latih + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + # Hitung bobot untuk setiap sampel (bukan per kelas) + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + # Buat Sampler yang akan menyeimbangkan pengambilan data + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # Buat DataLoader training BARU yang menggunakan sampler + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=0 # Sesuaikan dengan kemampuan CPU Anda + ) + print("Balanced Sampler berhasil dibuat. Setiap batch training sekarang akan seimbang.") + + # B. Arsitektur & Pretraining + model = RetinaClassifier(num_classes=len(classes)).to(DEVICE) + +# C. Menggunakan Focal Loss + criterion = FocalLoss(alpha=[1, 1, 1, 2], gamma=2).to(DEVICE) + print("Menggunakan Focal Loss untuk fokus pada sampel yang sulit.") + + + # D. Strategi Fine-tune +# --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- +print("\n--- TAHAP 1: Melatih Classifier Head ---") +for param in model.backbone.parameters(): + param.requires_grad = False + +# Ambil classifier layer dari backbone timm + classifier_module = model.backbone.get_classifier() +for param in classifier_module.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(classifier_module.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + +for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + # E. Evaluasi & Reporting + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927164636.py b/.history/src/train_20250927164636.py new file mode 100644 index 0000000000000000000000000000000000000000..03f2b0686e661a34f5f0224cdb5c8cbab676d5a4 --- /dev/null +++ b/.history/src/train_20250927164636.py @@ -0,0 +1,161 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler + +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = '../data' +OUTPUT_DIR = '../outputs' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_swin_model_final.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=NUM_WORKERS + ) + print("Balanced Sampler berhasil dibuat.") + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + criterion = FocalLoss().to(DEVICE) + print("Menggunakan Focal Loss.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927173232.py b/.history/src/train_20250927173232.py new file mode 100644 index 0000000000000000000000000000000000000000..d94602500b01905adca55230da238cb404fb5d3f --- /dev/null +++ b/.history/src/train_20250927173232.py @@ -0,0 +1,161 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler + +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = '../data' +OUTPUT_DIR = 'outputs' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_swin_model_final.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=NUM_WORKERS + ) + print("Balanced Sampler berhasil dibuat.") + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + criterion = FocalLoss().to(DEVICE) + print("Menggunakan Focal Loss.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927173233.py b/.history/src/train_20250927173233.py new file mode 100644 index 0000000000000000000000000000000000000000..ab73cc53f721e0ee11fea82304caf2b6d2fa038d --- /dev/null +++ b/.history/src/train_20250927173233.py @@ -0,0 +1,161 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler + +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = '.data' +OUTPUT_DIR = 'outputs' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_swin_model_final.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=NUM_WORKERS + ) + print("Balanced Sampler berhasil dibuat.") + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + criterion = FocalLoss().to(DEVICE) + print("Menggunakan Focal Loss.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927173240.py b/.history/src/train_20250927173240.py new file mode 100644 index 0000000000000000000000000000000000000000..bdf5d0f21579297812cca27d3d29f55ff6886162 --- /dev/null +++ b/.history/src/train_20250927173240.py @@ -0,0 +1,161 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler + +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = '.data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_swin_model_final.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=NUM_WORKERS + ) + print("Balanced Sampler berhasil dibuat.") + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + criterion = FocalLoss().to(DEVICE) + print("Menggunakan Focal Loss.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927174330.py b/.history/src/train_20250927174330.py new file mode 100644 index 0000000000000000000000000000000000000000..67af174818a1da9d09ae0424f1b9eac7b867cc97 --- /dev/null +++ b/.history/src/train_20250927174330.py @@ -0,0 +1,161 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler + +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_swin_model_final.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=NUM_WORKERS + ) + print("Balanced Sampler berhasil dibuat.") + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + criterion = FocalLoss().to(DEVICE) + print("Menggunakan Focal Loss.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927204808.py b/.history/src/train_20250927204808.py new file mode 100644 index 0000000000000000000000000000000000000000..641a887be9eb5b07faf5a5439690108fcce90060 --- /dev/null +++ b/.history/src/train_20250927204808.py @@ -0,0 +1,161 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler + +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_swin_model_final.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=NUM_WORKERS + ) + print("Balanced Sampler berhasil dibuat.") + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + criterion = FocalLoss().to(DEVICE) + print("Menggunakan Focal Loss.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927204953.py b/.history/src/train_20250927204953.py new file mode 100644 index 0000000000000000000000000000000000000000..641a887be9eb5b07faf5a5439690108fcce90060 --- /dev/null +++ b/.history/src/train_20250927204953.py @@ -0,0 +1,161 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler + +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_swin_model_final.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=NUM_WORKERS + ) + print("Balanced Sampler berhasil dibuat.") + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + criterion = FocalLoss().to(DEVICE) + print("Menggunakan Focal Loss.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250927204955.py b/.history/src/train_20250927204955.py new file mode 100644 index 0000000000000000000000000000000000000000..641a887be9eb5b07faf5a5439690108fcce90060 --- /dev/null +++ b/.history/src/train_20250927204955.py @@ -0,0 +1,161 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler + +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_swin_model_final.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=NUM_WORKERS + ) + print("Balanced Sampler berhasil dibuat.") + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + criterion = FocalLoss().to(DEVICE) + print("Menggunakan Focal Loss.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928065131.py b/.history/src/train_20250928065131.py new file mode 100644 index 0000000000000000000000000000000000000000..d5788d1c73f4c514583645c350584fb0556a67c4 --- /dev/null +++ b/.history/src/train_20250928065131.py @@ -0,0 +1,161 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler + +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_swin_model_final.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=NUM_WORKERS + ) + print("Balanced Sampler berhasil dibuat.") + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan Focal Loss.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928065215.py b/.history/src/train_20250928065215.py new file mode 100644 index 0000000000000000000000000000000000000000..a41e003ad6d92b023f042c1466ee0e65cdb10a1c --- /dev/null +++ b/.history/src/train_20250928065215.py @@ -0,0 +1,162 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler + +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_swin_model_final.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=NUM_WORKERS + ) + print("Balanced Sampler berhasil dibuat.") + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan Focal Loss.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5, verbose=True) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928065223.py b/.history/src/train_20250928065223.py new file mode 100644 index 0000000000000000000000000000000000000000..84a1ab304c5984d2cb3635cbadbd48ad1b2f5bb8 --- /dev/null +++ b/.history/src/train_20250928065223.py @@ -0,0 +1,161 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler + +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_swin_model_final.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=NUM_WORKERS + ) + print("Balanced Sampler berhasil dibuat.") + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan Focal Loss.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5, verbose=True) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step() + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928065246.py b/.history/src/train_20250928065246.py new file mode 100644 index 0000000000000000000000000000000000000000..0f44abe02fd877da4f5a8cf6db9b76ab72b6f95e --- /dev/null +++ b/.history/src/train_20250928065246.py @@ -0,0 +1,161 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler + +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_swin_model_final.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=NUM_WORKERS + ) + print("Balanced Sampler berhasil dibuat.") + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan Focal Loss.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5, verbose=True) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928065353.py b/.history/src/train_20250928065353.py new file mode 100644 index 0000000000000000000000000000000000000000..b4cb869e0437cc690bd4598c8965ae81152bd810 --- /dev/null +++ b/.history/src/train_20250928065353.py @@ -0,0 +1,161 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler + +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_swin_model_final.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=NUM_WORKERS + ) + print("Balanced Sampler berhasil dibuat.") + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan Focal Loss.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=0.5) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5, verbose=True) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928065357.py b/.history/src/train_20250928065357.py new file mode 100644 index 0000000000000000000000000000000000000000..6523c14ef35124065856550155db387972f55659 --- /dev/null +++ b/.history/src/train_20250928065357.py @@ -0,0 +1,161 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler + +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_swin_model_final.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=NUM_WORKERS + ) + print("Balanced Sampler berhasil dibuat.") + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan Focal Loss.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=0.05) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5, verbose=True) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928065403.py b/.history/src/train_20250928065403.py new file mode 100644 index 0000000000000000000000000000000000000000..26a0e7aacc76c496c67dff08965e39f9d28bbd8b --- /dev/null +++ b/.history/src/train_20250928065403.py @@ -0,0 +1,161 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler + +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_swin_model_final.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=NUM_WORKERS + ) + print("Balanced Sampler berhasil dibuat.") + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan Focal Loss.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=0.05) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=0.05) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5, verbose=True) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928065437.py b/.history/src/train_20250928065437.py new file mode 100644 index 0000000000000000000000000000000000000000..afb53551915f096bd58958db418715b9166040e0 --- /dev/null +++ b/.history/src/train_20250928065437.py @@ -0,0 +1,162 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler + + +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_swin_model_final.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=NUM_WORKERS + ) + print("Balanced Sampler berhasil dibuat.") + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan Focal Loss.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=0.05) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=0.05) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5, verbose=True) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928065442.py b/.history/src/train_20250928065442.py new file mode 100644 index 0000000000000000000000000000000000000000..3312ca6fd80fcbdafb80dd6931f9c792da8c1029 --- /dev/null +++ b/.history/src/train_20250928065442.py @@ -0,0 +1,162 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler + + +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss +from torch.optim.lr_scheduler import ReduceLROnPlateau +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_swin_model_final.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=NUM_WORKERS + ) + print("Balanced Sampler berhasil dibuat.") + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan Focal Loss.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=0.05) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=0.05) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5, verbose=True) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928065444.py b/.history/src/train_20250928065444.py new file mode 100644 index 0000000000000000000000000000000000000000..164183cae6be723eaa3825514c7afdd7425773be --- /dev/null +++ b/.history/src/train_20250928065444.py @@ -0,0 +1,163 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler + + +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss +from torch.optim.lr_scheduler import ReduceLROnPlateau + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_swin_model_final.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=NUM_WORKERS + ) + print("Balanced Sampler berhasil dibuat.") + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan Focal Loss.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=0.05) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=0.05) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5, verbose=True) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928065553.py b/.history/src/train_20250928065553.py new file mode 100644 index 0000000000000000000000000000000000000000..0a7713d7840d05738942e0cd943227749094a87c --- /dev/null +++ b/.history/src/train_20250928065553.py @@ -0,0 +1,163 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler + + +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss +from torch.optim.lr_scheduler import ReduceLROnPlateau + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.01 +MODEL_NAME = 'best_swin_model_final.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=NUM_WORKERS + ) + print("Balanced Sampler berhasil dibuat.") + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan Focal Loss.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=0.05) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=0.05) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5, verbose=True) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928065604.py b/.history/src/train_20250928065604.py new file mode 100644 index 0000000000000000000000000000000000000000..54f274c4ea12dd77b3b99ef35e21cdb18eb8daf7 --- /dev/null +++ b/.history/src/train_20250928065604.py @@ -0,0 +1,163 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler + + +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss +from torch.optim.lr_scheduler import ReduceLROnPlateau + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.0 +MODEL_NAME = 'best_swin_model_final.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=NUM_WORKERS + ) + print("Balanced Sampler berhasil dibuat.") + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan Focal Loss.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=0.05) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=0.05) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5, verbose=True) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928065605.py b/.history/src/train_20250928065605.py new file mode 100644 index 0000000000000000000000000000000000000000..ee558cd9fbb84c05e89f427e04a1a67496d89bec --- /dev/null +++ b/.history/src/train_20250928065605.py @@ -0,0 +1,163 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler + + +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss +from torch.optim.lr_scheduler import ReduceLROnPlateau + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 +MODEL_NAME = 'best_swin_model_final.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=NUM_WORKERS + ) + print("Balanced Sampler berhasil dibuat.") + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan Focal Loss.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=0.05) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=0.05) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5, verbose=True) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928065617.py b/.history/src/train_20250928065617.py new file mode 100644 index 0000000000000000000000000000000000000000..e79a4fa82125ace35e1ebd25f5ec4e591a8d2b9d --- /dev/null +++ b/.history/src/train_20250928065617.py @@ -0,0 +1,163 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler + + +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss +from torch.optim.lr_scheduler import ReduceLROnPlateau + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 +MODEL_NAME = 'best_swin_model_final.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=NUM_WORKERS + ) + print("Balanced Sampler berhasil dibuat.") + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan Focal Loss.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=0.05) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5, verbose=True) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928065621.py b/.history/src/train_20250928065621.py new file mode 100644 index 0000000000000000000000000000000000000000..49121b480b012cf57a45cdb5b7a525990a6c06d0 --- /dev/null +++ b/.history/src/train_20250928065621.py @@ -0,0 +1,163 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler + + +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss +from torch.optim.lr_scheduler import ReduceLROnPlateau + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 +MODEL_NAME = 'best_swin_model_final.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=NUM_WORKERS + ) + print("Balanced Sampler berhasil dibuat.") + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan Focal Loss.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5, verbose=True) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928072137.py b/.history/src/train_20250928072137.py new file mode 100644 index 0000000000000000000000000000000000000000..216ea731754170948ebe1197a876d0776c77ae81 --- /dev/null +++ b/.history/src/train_20250928072137.py @@ -0,0 +1,164 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler + + +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss +from torch.optim.lr_scheduler import ReduceLROnPlateau + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 +MODEL_NAME = 'best_swin_model_final.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=NUM_WORKERS + ) + print("Balanced Sampler berhasil dibuat.") + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan Focal Loss.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5, verbose=True) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928072138.py b/.history/src/train_20250928072138.py new file mode 100644 index 0000000000000000000000000000000000000000..66da29ef2775dae9fe4e9898318426fbaa0e4220 --- /dev/null +++ b/.history/src/train_20250928072138.py @@ -0,0 +1,164 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler + + +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss +from torch.optim.lr_scheduler import ReduceLROnPlateau + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 +MODEL_NAME = 'best_swin_model_final.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=NUM_WORKERS + ) + print("Balanced Sampler berhasil dibuat.") + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan Focal Loss.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5, verbose=True) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928072144.py b/.history/src/train_20250928072144.py new file mode 100644 index 0000000000000000000000000000000000000000..448a30cc161a3cb968973f4c51af366d9435a926 --- /dev/null +++ b/.history/src/train_20250928072144.py @@ -0,0 +1,163 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler + + +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss +from torch.optim.lr_scheduler import ReduceLROnPlateau + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 +MODEL_NAME = 'best_swin_model_final.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=NUM_WORKERS + ) + print("Balanced Sampler berhasil dibuat.") + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan Focal Loss.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928085719.py b/.history/src/train_20250928085719.py new file mode 100644 index 0000000000000000000000000000000000000000..b8161343e3e640180eedef6869e56496e00e14f9 --- /dev/null +++ b/.history/src/train_20250928085719.py @@ -0,0 +1,163 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler + + +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss +from torch.optim.lr_scheduler import ReduceLROnPlateau + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 +MODEL_NAME = 'best_swin_model_final.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=NUM_WORKERS + ) + print("Balanced Sampler berhasil dibuat.") + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan Focal Loss.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928105552.py b/.history/src/train_20250928105552.py new file mode 100644 index 0000000000000000000000000000000000000000..44ed9006369246899ae75b86e5b6a7aa6a9a6a00 --- /dev/null +++ b/.history/src/train_20250928105552.py @@ -0,0 +1,167 @@ +# src/train.py (Versi Final dengan Test-Time Augmentation) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Ini adalah parameter final yang bisa Anda gunakan +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = '../data' +OUTPUT_DIR = '../outputs' +IMAGE_SIZE = 224 # Coba naikkan ke 384 jika VRAM cukup, jangan lupa turunkan BATCH_SIZE +BATCH_SIZE = 8 # Ukuran batch yang aman untuk VRAM 4GB +NUM_WORKERS = 4 # Optimal untuk CPU 6-core +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 # Regularisasi yang sedikit lebih kuat +MODEL_NAME = 'best_model_final_TTA.pth' # Nama file baru untuk hasil final +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Versi validasi yang menggunakan Test-Time Augmentation (TTA). + Memprediksi gambar asli dan versi flip, lalu merata-ratakan hasilnya. + """ + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: Prediksi gambar asli dan versi flip horizontal + outputs_original = model(images) + outputs_flipped = model(torch.flip(images, dims=[3])) # flip horizontal + + # Rata-ratakan probabilitas hasil prediksi + outputs_avg = (torch.softmax(outputs_original, dim=1) + torch.softmax(outputs_flipped, dim=1)) / 2 + + # Hitung loss dari hasil rata-rata + loss = criterion(outputs_avg, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # Menggunakan CrossEntropyLoss dengan Label Smoothing untuk regularisasi + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan CrossEntropyLoss dengan Label Smoothing (0.1).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5) + print("Menggunakan scheduler ReduceLROnPlateau.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928105557.py b/.history/src/train_20250928105557.py new file mode 100644 index 0000000000000000000000000000000000000000..b8161343e3e640180eedef6869e56496e00e14f9 --- /dev/null +++ b/.history/src/train_20250928105557.py @@ -0,0 +1,163 @@ +# src/train.py + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler + + +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss +from torch.optim.lr_scheduler import ReduceLROnPlateau + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/models' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 +MODEL_NAME = 'best_swin_model_final.pth' +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch(model, dataloader, criterion, device): + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating"): + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + train_loader = DataLoader( + train_dataset, + batch_size=BATCH_SIZE, + sampler=sampler, + num_workers=NUM_WORKERS + ) + print("Balanced Sampler berhasil dibuat.") + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan Focal Loss.") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5) + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928105602.py b/.history/src/train_20250928105602.py new file mode 100644 index 0000000000000000000000000000000000000000..44ed9006369246899ae75b86e5b6a7aa6a9a6a00 --- /dev/null +++ b/.history/src/train_20250928105602.py @@ -0,0 +1,167 @@ +# src/train.py (Versi Final dengan Test-Time Augmentation) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Ini adalah parameter final yang bisa Anda gunakan +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = '../data' +OUTPUT_DIR = '../outputs' +IMAGE_SIZE = 224 # Coba naikkan ke 384 jika VRAM cukup, jangan lupa turunkan BATCH_SIZE +BATCH_SIZE = 8 # Ukuran batch yang aman untuk VRAM 4GB +NUM_WORKERS = 4 # Optimal untuk CPU 6-core +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 # Regularisasi yang sedikit lebih kuat +MODEL_NAME = 'best_model_final_TTA.pth' # Nama file baru untuk hasil final +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Versi validasi yang menggunakan Test-Time Augmentation (TTA). + Memprediksi gambar asli dan versi flip, lalu merata-ratakan hasilnya. + """ + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: Prediksi gambar asli dan versi flip horizontal + outputs_original = model(images) + outputs_flipped = model(torch.flip(images, dims=[3])) # flip horizontal + + # Rata-ratakan probabilitas hasil prediksi + outputs_avg = (torch.softmax(outputs_original, dim=1) + torch.softmax(outputs_flipped, dim=1)) / 2 + + # Hitung loss dari hasil rata-rata + loss = criterion(outputs_avg, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # Menggunakan CrossEntropyLoss dengan Label Smoothing untuk regularisasi + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan CrossEntropyLoss dengan Label Smoothing (0.1).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5) + print("Menggunakan scheduler ReduceLROnPlateau.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928105609.py b/.history/src/train_20250928105609.py new file mode 100644 index 0000000000000000000000000000000000000000..3d3288c0975f9e90d73776ace17c3eca8157c50d --- /dev/null +++ b/.history/src/train_20250928105609.py @@ -0,0 +1,167 @@ +# src/train.py (Versi Final dengan Test-Time Augmentation) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Ini adalah parameter final yang bisa Anda gunakan +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = '../outputs' +IMAGE_SIZE = 224 # Coba naikkan ke 384 jika VRAM cukup, jangan lupa turunkan BATCH_SIZE +BATCH_SIZE = 8 # Ukuran batch yang aman untuk VRAM 4GB +NUM_WORKERS = 4 # Optimal untuk CPU 6-core +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 # Regularisasi yang sedikit lebih kuat +MODEL_NAME = 'best_model_final_TTA.pth' # Nama file baru untuk hasil final +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Versi validasi yang menggunakan Test-Time Augmentation (TTA). + Memprediksi gambar asli dan versi flip, lalu merata-ratakan hasilnya. + """ + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: Prediksi gambar asli dan versi flip horizontal + outputs_original = model(images) + outputs_flipped = model(torch.flip(images, dims=[3])) # flip horizontal + + # Rata-ratakan probabilitas hasil prediksi + outputs_avg = (torch.softmax(outputs_original, dim=1) + torch.softmax(outputs_flipped, dim=1)) / 2 + + # Hitung loss dari hasil rata-rata + loss = criterion(outputs_avg, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # Menggunakan CrossEntropyLoss dengan Label Smoothing untuk regularisasi + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan CrossEntropyLoss dengan Label Smoothing (0.1).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5) + print("Menggunakan scheduler ReduceLROnPlateau.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928105612.py b/.history/src/train_20250928105612.py new file mode 100644 index 0000000000000000000000000000000000000000..3e271b4f81320f89106147aa4c5eb0d7d658a19a --- /dev/null +++ b/.history/src/train_20250928105612.py @@ -0,0 +1,167 @@ +# src/train.py (Versi Final dengan Test-Time Augmentation) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Ini adalah parameter final yang bisa Anda gunakan +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs' +IMAGE_SIZE = 224 # Coba naikkan ke 384 jika VRAM cukup, jangan lupa turunkan BATCH_SIZE +BATCH_SIZE = 8 # Ukuran batch yang aman untuk VRAM 4GB +NUM_WORKERS = 4 # Optimal untuk CPU 6-core +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 # Regularisasi yang sedikit lebih kuat +MODEL_NAME = 'best_model_final_TTA.pth' # Nama file baru untuk hasil final +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Versi validasi yang menggunakan Test-Time Augmentation (TTA). + Memprediksi gambar asli dan versi flip, lalu merata-ratakan hasilnya. + """ + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: Prediksi gambar asli dan versi flip horizontal + outputs_original = model(images) + outputs_flipped = model(torch.flip(images, dims=[3])) # flip horizontal + + # Rata-ratakan probabilitas hasil prediksi + outputs_avg = (torch.softmax(outputs_original, dim=1) + torch.softmax(outputs_flipped, dim=1)) / 2 + + # Hitung loss dari hasil rata-rata + loss = criterion(outputs_avg, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # Menggunakan CrossEntropyLoss dengan Label Smoothing untuk regularisasi + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan CrossEntropyLoss dengan Label Smoothing (0.1).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5) + print("Menggunakan scheduler ReduceLROnPlateau.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928105615.py b/.history/src/train_20250928105615.py new file mode 100644 index 0000000000000000000000000000000000000000..b8dc31d7927ec4b47519e9f3f0141f35cb70d569 --- /dev/null +++ b/.history/src/train_20250928105615.py @@ -0,0 +1,167 @@ +# src/train.py (Versi Final dengan Test-Time Augmentation) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Ini adalah parameter final yang bisa Anda gunakan +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs.ne' +IMAGE_SIZE = 224 # Coba naikkan ke 384 jika VRAM cukup, jangan lupa turunkan BATCH_SIZE +BATCH_SIZE = 8 # Ukuran batch yang aman untuk VRAM 4GB +NUM_WORKERS = 4 # Optimal untuk CPU 6-core +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 # Regularisasi yang sedikit lebih kuat +MODEL_NAME = 'best_model_final_TTA.pth' # Nama file baru untuk hasil final +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Versi validasi yang menggunakan Test-Time Augmentation (TTA). + Memprediksi gambar asli dan versi flip, lalu merata-ratakan hasilnya. + """ + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: Prediksi gambar asli dan versi flip horizontal + outputs_original = model(images) + outputs_flipped = model(torch.flip(images, dims=[3])) # flip horizontal + + # Rata-ratakan probabilitas hasil prediksi + outputs_avg = (torch.softmax(outputs_original, dim=1) + torch.softmax(outputs_flipped, dim=1)) / 2 + + # Hitung loss dari hasil rata-rata + loss = criterion(outputs_avg, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # Menggunakan CrossEntropyLoss dengan Label Smoothing untuk regularisasi + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan CrossEntropyLoss dengan Label Smoothing (0.1).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5) + print("Menggunakan scheduler ReduceLROnPlateau.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928105619.py b/.history/src/train_20250928105619.py new file mode 100644 index 0000000000000000000000000000000000000000..240a7b45b9b211bb92e0745ede99d31b9fdcd6b6 --- /dev/null +++ b/.history/src/train_20250928105619.py @@ -0,0 +1,167 @@ +# src/train.py (Versi Final dengan Test-Time Augmentation) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Ini adalah parameter final yang bisa Anda gunakan +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/new' +IMAGE_SIZE = 224 # Coba naikkan ke 384 jika VRAM cukup, jangan lupa turunkan BATCH_SIZE +BATCH_SIZE = 8 # Ukuran batch yang aman untuk VRAM 4GB +NUM_WORKERS = 4 # Optimal untuk CPU 6-core +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 # Regularisasi yang sedikit lebih kuat +MODEL_NAME = 'best_model_final_TTA.pth' # Nama file baru untuk hasil final +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Versi validasi yang menggunakan Test-Time Augmentation (TTA). + Memprediksi gambar asli dan versi flip, lalu merata-ratakan hasilnya. + """ + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: Prediksi gambar asli dan versi flip horizontal + outputs_original = model(images) + outputs_flipped = model(torch.flip(images, dims=[3])) # flip horizontal + + # Rata-ratakan probabilitas hasil prediksi + outputs_avg = (torch.softmax(outputs_original, dim=1) + torch.softmax(outputs_flipped, dim=1)) / 2 + + # Hitung loss dari hasil rata-rata + loss = criterion(outputs_avg, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # Menggunakan CrossEntropyLoss dengan Label Smoothing untuk regularisasi + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan CrossEntropyLoss dengan Label Smoothing (0.1).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5) + print("Menggunakan scheduler ReduceLROnPlateau.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928105621.py b/.history/src/train_20250928105621.py new file mode 100644 index 0000000000000000000000000000000000000000..a8b2c61fb13bc2debf9a8d8a004084540b516ca9 --- /dev/null +++ b/.history/src/train_20250928105621.py @@ -0,0 +1,167 @@ +# src/train.py (Versi Final dengan Test-Time Augmentation) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Ini adalah parameter final yang bisa Anda gunakan +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/new_model' +IMAGE_SIZE = 224 # Coba naikkan ke 384 jika VRAM cukup, jangan lupa turunkan BATCH_SIZE +BATCH_SIZE = 8 # Ukuran batch yang aman untuk VRAM 4GB +NUM_WORKERS = 4 # Optimal untuk CPU 6-core +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 # Regularisasi yang sedikit lebih kuat +MODEL_NAME = 'best_model_final_TTA.pth' # Nama file baru untuk hasil final +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Versi validasi yang menggunakan Test-Time Augmentation (TTA). + Memprediksi gambar asli dan versi flip, lalu merata-ratakan hasilnya. + """ + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: Prediksi gambar asli dan versi flip horizontal + outputs_original = model(images) + outputs_flipped = model(torch.flip(images, dims=[3])) # flip horizontal + + # Rata-ratakan probabilitas hasil prediksi + outputs_avg = (torch.softmax(outputs_original, dim=1) + torch.softmax(outputs_flipped, dim=1)) / 2 + + # Hitung loss dari hasil rata-rata + loss = criterion(outputs_avg, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # Menggunakan CrossEntropyLoss dengan Label Smoothing untuk regularisasi + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan CrossEntropyLoss dengan Label Smoothing (0.1).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5) + print("Menggunakan scheduler ReduceLROnPlateau.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928105632.py b/.history/src/train_20250928105632.py new file mode 100644 index 0000000000000000000000000000000000000000..f9a6e799d96f59271e9dd241219ced2065a37b09 --- /dev/null +++ b/.history/src/train_20250928105632.py @@ -0,0 +1,167 @@ +# src/train.py (Versi Final dengan Test-Time Augmentation) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Ini adalah parameter final yang bisa Anda gunakan +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/new_model' +IMAGE_SIZE = 4 # Coba naikkan ke 384 jika VRAM cukup, jangan lupa turunkan BATCH_SIZE +BATCH_SIZE = 8 # Ukuran batch yang aman untuk VRAM 4GB +NUM_WORKERS = 4 # Optimal untuk CPU 6-core +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 # Regularisasi yang sedikit lebih kuat +MODEL_NAME = 'best_model_final_TTA.pth' # Nama file baru untuk hasil final +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Versi validasi yang menggunakan Test-Time Augmentation (TTA). + Memprediksi gambar asli dan versi flip, lalu merata-ratakan hasilnya. + """ + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: Prediksi gambar asli dan versi flip horizontal + outputs_original = model(images) + outputs_flipped = model(torch.flip(images, dims=[3])) # flip horizontal + + # Rata-ratakan probabilitas hasil prediksi + outputs_avg = (torch.softmax(outputs_original, dim=1) + torch.softmax(outputs_flipped, dim=1)) / 2 + + # Hitung loss dari hasil rata-rata + loss = criterion(outputs_avg, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # Menggunakan CrossEntropyLoss dengan Label Smoothing untuk regularisasi + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan CrossEntropyLoss dengan Label Smoothing (0.1).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5) + print("Menggunakan scheduler ReduceLROnPlateau.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928105636.py b/.history/src/train_20250928105636.py new file mode 100644 index 0000000000000000000000000000000000000000..e3adac38427451683e5990d12dde6a7f9a175e7d --- /dev/null +++ b/.history/src/train_20250928105636.py @@ -0,0 +1,167 @@ +# src/train.py (Versi Final dengan Test-Time Augmentation) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Ini adalah parameter final yang bisa Anda gunakan +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/new_model' +IMAGE_SIZE = 3844 # Coba naikkan ke 384 jika VRAM cukup, jangan lupa turunkan BATCH_SIZE +BATCH_SIZE = 8 # Ukuran batch yang aman untuk VRAM 4GB +NUM_WORKERS = 4 # Optimal untuk CPU 6-core +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 # Regularisasi yang sedikit lebih kuat +MODEL_NAME = 'best_model_final_TTA.pth' # Nama file baru untuk hasil final +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Versi validasi yang menggunakan Test-Time Augmentation (TTA). + Memprediksi gambar asli dan versi flip, lalu merata-ratakan hasilnya. + """ + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: Prediksi gambar asli dan versi flip horizontal + outputs_original = model(images) + outputs_flipped = model(torch.flip(images, dims=[3])) # flip horizontal + + # Rata-ratakan probabilitas hasil prediksi + outputs_avg = (torch.softmax(outputs_original, dim=1) + torch.softmax(outputs_flipped, dim=1)) / 2 + + # Hitung loss dari hasil rata-rata + loss = criterion(outputs_avg, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # Menggunakan CrossEntropyLoss dengan Label Smoothing untuk regularisasi + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan CrossEntropyLoss dengan Label Smoothing (0.1).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5) + print("Menggunakan scheduler ReduceLROnPlateau.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928105637.py b/.history/src/train_20250928105637.py new file mode 100644 index 0000000000000000000000000000000000000000..9e0951e0ed0aac7dd81f7ab9683f8b28010323bd --- /dev/null +++ b/.history/src/train_20250928105637.py @@ -0,0 +1,167 @@ +# src/train.py (Versi Final dengan Test-Time Augmentation) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Ini adalah parameter final yang bisa Anda gunakan +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/new_model' +IMAGE_SIZE = 384 # Coba naikkan ke 384 jika VRAM cukup, jangan lupa turunkan BATCH_SIZE +BATCH_SIZE = 8 # Ukuran batch yang aman untuk VRAM 4GB +NUM_WORKERS = 4 # Optimal untuk CPU 6-core +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 # Regularisasi yang sedikit lebih kuat +MODEL_NAME = 'best_model_final_TTA.pth' # Nama file baru untuk hasil final +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Versi validasi yang menggunakan Test-Time Augmentation (TTA). + Memprediksi gambar asli dan versi flip, lalu merata-ratakan hasilnya. + """ + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: Prediksi gambar asli dan versi flip horizontal + outputs_original = model(images) + outputs_flipped = model(torch.flip(images, dims=[3])) # flip horizontal + + # Rata-ratakan probabilitas hasil prediksi + outputs_avg = (torch.softmax(outputs_original, dim=1) + torch.softmax(outputs_flipped, dim=1)) / 2 + + # Hitung loss dari hasil rata-rata + loss = criterion(outputs_avg, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # Menggunakan CrossEntropyLoss dengan Label Smoothing untuk regularisasi + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan CrossEntropyLoss dengan Label Smoothing (0.1).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5) + print("Menggunakan scheduler ReduceLROnPlateau.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928105640.py b/.history/src/train_20250928105640.py new file mode 100644 index 0000000000000000000000000000000000000000..940ec1d0627fa986fc55d302d8ea2dca09b809a3 --- /dev/null +++ b/.history/src/train_20250928105640.py @@ -0,0 +1,167 @@ +# src/train.py (Versi Final dengan Test-Time Augmentation) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Ini adalah parameter final yang bisa Anda gunakan +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/new_model' +IMAGE_SIZE = 384 # Coba naikkan ke 384 jika VRAM cukup, jangan lupa turunkan BATCH_SIZE +BATCH_SIZE = 4 # Ukuran batch yang aman untuk VRAM 4GB +NUM_WORKERS = 4 # Optimal untuk CPU 6-core +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 # Regularisasi yang sedikit lebih kuat +MODEL_NAME = 'best_model_final_TTA.pth' # Nama file baru untuk hasil final +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Versi validasi yang menggunakan Test-Time Augmentation (TTA). + Memprediksi gambar asli dan versi flip, lalu merata-ratakan hasilnya. + """ + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: Prediksi gambar asli dan versi flip horizontal + outputs_original = model(images) + outputs_flipped = model(torch.flip(images, dims=[3])) # flip horizontal + + # Rata-ratakan probabilitas hasil prediksi + outputs_avg = (torch.softmax(outputs_original, dim=1) + torch.softmax(outputs_flipped, dim=1)) / 2 + + # Hitung loss dari hasil rata-rata + loss = criterion(outputs_avg, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # Menggunakan CrossEntropyLoss dengan Label Smoothing untuk regularisasi + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan CrossEntropyLoss dengan Label Smoothing (0.1).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5) + print("Menggunakan scheduler ReduceLROnPlateau.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928105822.py b/.history/src/train_20250928105822.py new file mode 100644 index 0000000000000000000000000000000000000000..ea01801570864d6b289449384c49ab1b6e36f8b5 --- /dev/null +++ b/.history/src/train_20250928105822.py @@ -0,0 +1,167 @@ +# src/train.py (Versi Final dengan Test-Time Augmentation) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Ini adalah parameter final yang bisa Anda gunakan +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/new_model' +IMAGE_SIZE = 244 # Coba naikkan ke 384 jika VRAM cukup, jangan lupa turunkan BATCH_SIZE +BATCH_SIZE = 4 # Ukuran batch yang aman untuk VRAM 4GB +NUM_WORKERS = 4 # Optimal untuk CPU 6-core +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 # Regularisasi yang sedikit lebih kuat +MODEL_NAME = 'best_model_final_TTA.pth' # Nama file baru untuk hasil final +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Versi validasi yang menggunakan Test-Time Augmentation (TTA). + Memprediksi gambar asli dan versi flip, lalu merata-ratakan hasilnya. + """ + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: Prediksi gambar asli dan versi flip horizontal + outputs_original = model(images) + outputs_flipped = model(torch.flip(images, dims=[3])) # flip horizontal + + # Rata-ratakan probabilitas hasil prediksi + outputs_avg = (torch.softmax(outputs_original, dim=1) + torch.softmax(outputs_flipped, dim=1)) / 2 + + # Hitung loss dari hasil rata-rata + loss = criterion(outputs_avg, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # Menggunakan CrossEntropyLoss dengan Label Smoothing untuk regularisasi + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan CrossEntropyLoss dengan Label Smoothing (0.1).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5) + print("Menggunakan scheduler ReduceLROnPlateau.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928105825.py b/.history/src/train_20250928105825.py new file mode 100644 index 0000000000000000000000000000000000000000..f44e6ec8ff49601ad9e3929bc4cfc12e301510fa --- /dev/null +++ b/.history/src/train_20250928105825.py @@ -0,0 +1,167 @@ +# src/train.py (Versi Final dengan Test-Time Augmentation) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Ini adalah parameter final yang bisa Anda gunakan +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/new_model' +IMAGE_SIZE = 224 # Coba naikkan ke 384 jika VRAM cukup, jangan lupa turunkan BATCH_SIZE +BATCH_SIZE = 4 # Ukuran batch yang aman untuk VRAM 4GB +NUM_WORKERS = 4 # Optimal untuk CPU 6-core +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 # Regularisasi yang sedikit lebih kuat +MODEL_NAME = 'best_model_final_TTA.pth' # Nama file baru untuk hasil final +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Versi validasi yang menggunakan Test-Time Augmentation (TTA). + Memprediksi gambar asli dan versi flip, lalu merata-ratakan hasilnya. + """ + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: Prediksi gambar asli dan versi flip horizontal + outputs_original = model(images) + outputs_flipped = model(torch.flip(images, dims=[3])) # flip horizontal + + # Rata-ratakan probabilitas hasil prediksi + outputs_avg = (torch.softmax(outputs_original, dim=1) + torch.softmax(outputs_flipped, dim=1)) / 2 + + # Hitung loss dari hasil rata-rata + loss = criterion(outputs_avg, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # Menggunakan CrossEntropyLoss dengan Label Smoothing untuk regularisasi + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan CrossEntropyLoss dengan Label Smoothing (0.1).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5) + print("Menggunakan scheduler ReduceLROnPlateau.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928105829.py b/.history/src/train_20250928105829.py new file mode 100644 index 0000000000000000000000000000000000000000..a8b2c61fb13bc2debf9a8d8a004084540b516ca9 --- /dev/null +++ b/.history/src/train_20250928105829.py @@ -0,0 +1,167 @@ +# src/train.py (Versi Final dengan Test-Time Augmentation) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Ini adalah parameter final yang bisa Anda gunakan +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/new_model' +IMAGE_SIZE = 224 # Coba naikkan ke 384 jika VRAM cukup, jangan lupa turunkan BATCH_SIZE +BATCH_SIZE = 8 # Ukuran batch yang aman untuk VRAM 4GB +NUM_WORKERS = 4 # Optimal untuk CPU 6-core +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 # Regularisasi yang sedikit lebih kuat +MODEL_NAME = 'best_model_final_TTA.pth' # Nama file baru untuk hasil final +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Versi validasi yang menggunakan Test-Time Augmentation (TTA). + Memprediksi gambar asli dan versi flip, lalu merata-ratakan hasilnya. + """ + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: Prediksi gambar asli dan versi flip horizontal + outputs_original = model(images) + outputs_flipped = model(torch.flip(images, dims=[3])) # flip horizontal + + # Rata-ratakan probabilitas hasil prediksi + outputs_avg = (torch.softmax(outputs_original, dim=1) + torch.softmax(outputs_flipped, dim=1)) / 2 + + # Hitung loss dari hasil rata-rata + loss = criterion(outputs_avg, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # Menggunakan CrossEntropyLoss dengan Label Smoothing untuk regularisasi + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan CrossEntropyLoss dengan Label Smoothing (0.1).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5) + print("Menggunakan scheduler ReduceLROnPlateau.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928105842.py b/.history/src/train_20250928105842.py new file mode 100644 index 0000000000000000000000000000000000000000..940ec1d0627fa986fc55d302d8ea2dca09b809a3 --- /dev/null +++ b/.history/src/train_20250928105842.py @@ -0,0 +1,167 @@ +# src/train.py (Versi Final dengan Test-Time Augmentation) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Ini adalah parameter final yang bisa Anda gunakan +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/new_model' +IMAGE_SIZE = 384 # Coba naikkan ke 384 jika VRAM cukup, jangan lupa turunkan BATCH_SIZE +BATCH_SIZE = 4 # Ukuran batch yang aman untuk VRAM 4GB +NUM_WORKERS = 4 # Optimal untuk CPU 6-core +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 # Regularisasi yang sedikit lebih kuat +MODEL_NAME = 'best_model_final_TTA.pth' # Nama file baru untuk hasil final +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Versi validasi yang menggunakan Test-Time Augmentation (TTA). + Memprediksi gambar asli dan versi flip, lalu merata-ratakan hasilnya. + """ + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: Prediksi gambar asli dan versi flip horizontal + outputs_original = model(images) + outputs_flipped = model(torch.flip(images, dims=[3])) # flip horizontal + + # Rata-ratakan probabilitas hasil prediksi + outputs_avg = (torch.softmax(outputs_original, dim=1) + torch.softmax(outputs_flipped, dim=1)) / 2 + + # Hitung loss dari hasil rata-rata + loss = criterion(outputs_avg, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # Menggunakan CrossEntropyLoss dengan Label Smoothing untuk regularisasi + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan CrossEntropyLoss dengan Label Smoothing (0.1).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5) + print("Menggunakan scheduler ReduceLROnPlateau.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928105856.py b/.history/src/train_20250928105856.py new file mode 100644 index 0000000000000000000000000000000000000000..96bd5b00ac39aaaf6dbf08024c7528145bc0e652 --- /dev/null +++ b/.history/src/train_20250928105856.py @@ -0,0 +1,167 @@ +# src/train.py (dengan Test-Time Augmentation) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Ini adalah parameter final yang bisa Anda gunakan +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/new_model' +IMAGE_SIZE = 384 # Coba naikkan ke 384 jika VRAM cukup, jangan lupa turunkan BATCH_SIZE +BATCH_SIZE = 4 # Ukuran batch yang aman untuk VRAM 4GB +NUM_WORKERS = 4 # Optimal untuk CPU 6-core +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 # Regularisasi yang sedikit lebih kuat +MODEL_NAME = 'best_model_final_TTA.pth' # Nama file baru untuk hasil final +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Versi validasi yang menggunakan Test-Time Augmentation (TTA). + Memprediksi gambar asli dan versi flip, lalu merata-ratakan hasilnya. + """ + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: Prediksi gambar asli dan versi flip horizontal + outputs_original = model(images) + outputs_flipped = model(torch.flip(images, dims=[3])) # flip horizontal + + # Rata-ratakan probabilitas hasil prediksi + outputs_avg = (torch.softmax(outputs_original, dim=1) + torch.softmax(outputs_flipped, dim=1)) / 2 + + # Hitung loss dari hasil rata-rata + loss = criterion(outputs_avg, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES).to(DEVICE) + + # Menggunakan CrossEntropyLoss dengan Label Smoothing untuk regularisasi + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan CrossEntropyLoss dengan Label Smoothing (0.1).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5) + print("Menggunakan scheduler ReduceLROnPlateau.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250928110010.py b/.history/src/train_20250928110010.py new file mode 100644 index 0000000000000000000000000000000000000000..b1f3eac879e46050ae31a74b521838bc2683dae7 --- /dev/null +++ b/.history/src/train_20250928110010.py @@ -0,0 +1,167 @@ +# src/train.py (dengan Test-Time Augmentation) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Ini adalah parameter final yang bisa Anda gunakan +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/new_model' +IMAGE_SIZE = 384 # Coba naikkan ke 384 jika VRAM cukup, jangan lupa turunkan BATCH_SIZE +BATCH_SIZE = 4 # Ukuran batch yang aman untuk VRAM 4GB +NUM_WORKERS = 4 # Optimal untuk CPU 6-core +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 # Regularisasi yang sedikit lebih kuat +MODEL_NAME = 'best_model_final_TTA.pth' # Nama file baru untuk hasil final +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Versi validasi yang menggunakan Test-Time Augmentation (TTA). + Memprediksi gambar asli dan versi flip, lalu merata-ratakan hasilnya. + """ + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: Prediksi gambar asli dan versi flip horizontal + outputs_original = model(images) + outputs_flipped = model(torch.flip(images, dims=[3])) # flip horizontal + + # Rata-ratakan probabilitas hasil prediksi + outputs_avg = (torch.softmax(outputs_original, dim=1) + torch.softmax(outputs_flipped, dim=1)) / 2 + + # Hitung loss dari hasil rata-rata + loss = criterion(outputs_avg, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES, image_size=IMAGE_SIZE).to(DEVICE) + + # Menggunakan CrossEntropyLoss dengan Label Smoothing untuk regularisasi + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan CrossEntropyLoss dengan Label Smoothing (0.1).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5) + print("Menggunakan scheduler ReduceLROnPlateau.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250929123148.py b/.history/src/train_20250929123148.py new file mode 100644 index 0000000000000000000000000000000000000000..a553f5a3471e3aebd989d678b8a0813b162e9988 --- /dev/null +++ b/.history/src/train_20250929123148.py @@ -0,0 +1,167 @@ +# src/train.py (dengan Test-Time Augmentation) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Ini adalah parameter final yang bisa Anda gunakan +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/new_model' +IMAGE_SIZE = # Coba naikkan ke 384 jika VRAM cukup, jangan lupa turunkan BATCH_SIZE +BATCH_SIZE = 4 # Ukuran batch yang aman untuk VRAM 4GB +NUM_WORKERS = 4 # Optimal untuk CPU 6-core +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 # Regularisasi yang sedikit lebih kuat +MODEL_NAME = 'best_model_final_TTA.pth' # Nama file baru untuk hasil final +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Versi validasi yang menggunakan Test-Time Augmentation (TTA). + Memprediksi gambar asli dan versi flip, lalu merata-ratakan hasilnya. + """ + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: Prediksi gambar asli dan versi flip horizontal + outputs_original = model(images) + outputs_flipped = model(torch.flip(images, dims=[3])) # flip horizontal + + # Rata-ratakan probabilitas hasil prediksi + outputs_avg = (torch.softmax(outputs_original, dim=1) + torch.softmax(outputs_flipped, dim=1)) / 2 + + # Hitung loss dari hasil rata-rata + loss = criterion(outputs_avg, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES, image_size=IMAGE_SIZE).to(DEVICE) + + # Menggunakan CrossEntropyLoss dengan Label Smoothing untuk regularisasi + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan CrossEntropyLoss dengan Label Smoothing (0.1).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5) + print("Menggunakan scheduler ReduceLROnPlateau.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250929123152.py b/.history/src/train_20250929123152.py new file mode 100644 index 0000000000000000000000000000000000000000..f34d418e5f10e5c8ae7f5372a632160854c1eed7 --- /dev/null +++ b/.history/src/train_20250929123152.py @@ -0,0 +1,167 @@ +# src/train.py (dengan Test-Time Augmentation) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Ini adalah parameter final yang bisa Anda gunakan +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/new_model' +IMAGE_SIZE = 224 # Coba naikkan ke 384 jika VRAM cukup, jangan lupa turunkan BATCH_SIZE +BATCH_SIZE = 4 # Ukuran batch yang aman untuk VRAM 4GB +NUM_WORKERS = 4 # Optimal untuk CPU 6-core +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 # Regularisasi yang sedikit lebih kuat +MODEL_NAME = 'best_model_final_TTA.pth' # Nama file baru untuk hasil final +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Versi validasi yang menggunakan Test-Time Augmentation (TTA). + Memprediksi gambar asli dan versi flip, lalu merata-ratakan hasilnya. + """ + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: Prediksi gambar asli dan versi flip horizontal + outputs_original = model(images) + outputs_flipped = model(torch.flip(images, dims=[3])) # flip horizontal + + # Rata-ratakan probabilitas hasil prediksi + outputs_avg = (torch.softmax(outputs_original, dim=1) + torch.softmax(outputs_flipped, dim=1)) / 2 + + # Hitung loss dari hasil rata-rata + loss = criterion(outputs_avg, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES, image_size=IMAGE_SIZE).to(DEVICE) + + # Menggunakan CrossEntropyLoss dengan Label Smoothing untuk regularisasi + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan CrossEntropyLoss dengan Label Smoothing (0.1).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5) + print("Menggunakan scheduler ReduceLROnPlateau.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250929123155.py b/.history/src/train_20250929123155.py new file mode 100644 index 0000000000000000000000000000000000000000..102b5c776a17c924477616dbd33597280ad568bc --- /dev/null +++ b/.history/src/train_20250929123155.py @@ -0,0 +1,167 @@ +# src/train.py (dengan Test-Time Augmentation) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Ini adalah parameter final yang bisa Anda gunakan +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/new_model' +IMAGE_SIZE = 224 # Coba naikkan ke 384 jika VRAM cukup, jangan lupa turunkan BATCH_SIZE +BATCH_SIZE = 8 # Ukuran batch yang aman untuk VRAM 4GB +NUM_WORKERS = 4 # Optimal untuk CPU 6-core +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 # Regularisasi yang sedikit lebih kuat +MODEL_NAME = 'best_model_final_TTA.pth' # Nama file baru untuk hasil final +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Versi validasi yang menggunakan Test-Time Augmentation (TTA). + Memprediksi gambar asli dan versi flip, lalu merata-ratakan hasilnya. + """ + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: Prediksi gambar asli dan versi flip horizontal + outputs_original = model(images) + outputs_flipped = model(torch.flip(images, dims=[3])) # flip horizontal + + # Rata-ratakan probabilitas hasil prediksi + outputs_avg = (torch.softmax(outputs_original, dim=1) + torch.softmax(outputs_flipped, dim=1)) / 2 + + # Hitung loss dari hasil rata-rata + loss = criterion(outputs_avg, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES, image_size=IMAGE_SIZE).to(DEVICE) + + # Menggunakan CrossEntropyLoss dengan Label Smoothing untuk regularisasi + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan CrossEntropyLoss dengan Label Smoothing (0.1).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5) + print("Menggunakan scheduler ReduceLROnPlateau.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250930184959.py b/.history/src/train_20250930184959.py new file mode 100644 index 0000000000000000000000000000000000000000..a7b6717c1c16286b50c99a9446de7e3c6915b354 --- /dev/null +++ b/.history/src/train_20250930184959.py @@ -0,0 +1,167 @@ +# src/train.py (dengan Test-Time Augmentation) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Ini adalah parameter final yang bisa Anda gunakan +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/new_model' +IMAGE_SIZE = 384 # Coba naikkan ke 384 jika VRAM cukup, jangan lupa turunkan BATCH_SIZE +BATCH_SIZE = 8 # Ukuran batch yang aman untuk VRAM 4GB +NUM_WORKERS = 4 # Optimal untuk CPU 6-core +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 # Regularisasi yang sedikit lebih kuat +MODEL_NAME = 'best_model_final_TTA.pth' # Nama file baru untuk hasil final +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Versi validasi yang menggunakan Test-Time Augmentation (TTA). + Memprediksi gambar asli dan versi flip, lalu merata-ratakan hasilnya. + """ + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: Prediksi gambar asli dan versi flip horizontal + outputs_original = model(images) + outputs_flipped = model(torch.flip(images, dims=[3])) # flip horizontal + + # Rata-ratakan probabilitas hasil prediksi + outputs_avg = (torch.softmax(outputs_original, dim=1) + torch.softmax(outputs_flipped, dim=1)) / 2 + + # Hitung loss dari hasil rata-rata + loss = criterion(outputs_avg, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES, image_size=IMAGE_SIZE).to(DEVICE) + + # Menggunakan CrossEntropyLoss dengan Label Smoothing untuk regularisasi + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan CrossEntropyLoss dengan Label Smoothing (0.1).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5) + print("Menggunakan scheduler ReduceLROnPlateau.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250930190205.py b/.history/src/train_20250930190205.py new file mode 100644 index 0000000000000000000000000000000000000000..83be7fa4a3499a7a2525bb892e502d17ee34815b --- /dev/null +++ b/.history/src/train_20250930190205.py @@ -0,0 +1,348 @@ +# src/train.py (dengan Test-Time Augmentation) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +# Ini adalah parameter final yang bisa Anda gunakan +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/new_model' +IMAGE_SIZE = 384 # Coba naikkan ke 384 jika VRAM cukup, jangan lupa turunkan BATCH_SIZE +BATCH_SIZE = 8 # Ukuran batch yang aman untuk VRAM 4GB +NUM_WORKERS = 4 # Optimal untuk CPU 6-core +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 # Regularisasi yang sedikit lebih kuat +MODEL_NAME = 'best_model_final_TTA.pth' # Nama file baru untuk hasil final +NUM_CLASSES = 4 + +# --- 2. FUNGSI TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Versi validasi yang menggunakan Test-Time Augmentation (TTA). + Memprediksi gambar asli dan versi flip, lalu merata-ratakan hasilnya. + """ + model.eval() + running_loss = 0.0 + correct_predictions = 0 + total_samples = 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: Prediksi gambar asli dan versi flip horizontal + outputs_original = model(images) + outputs_flipped = model(torch.flip(images, dims=[3])) # flip horizontal + + # Rata-ratakan probabilitas hasil prediksi + outputs_avg = (torch.softmax(outputs_original, dim=1) + torch.softmax(outputs_flipped, dim=1)) / 2 + + # Hitung loss dari hasil rat# src/train.py (dengan TTA + FocalLoss + EarlyStopping) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau +import copy + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/new_model' +IMAGE_SIZE = 384 +BATCH_SIZE = 8 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 +MODEL_NAME = 'best_model_final_TTA_Focal.pth' +NUM_CLASSES = 4 +PATIENCE = 7 # untuk EarlyStopping + +# --- 2. TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss, correct_predictions, total_samples = 0.0, 0, 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Validasi dengan Test-Time Augmentation (TTA). + Gunakan prediksi rata-rata dari beberapa augmentasi sederhana. + """ + model.eval() + running_loss, correct_predictions, total_samples = 0.0, 0, 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: original, flip H, flip V, rotate 90 + outputs_list = [] + outputs_list.append(model(images)) # original + outputs_list.append(model(torch.flip(images, dims=[3]))) # horizontal flip + outputs_list.append(model(torch.flip(images, dims=[2]))) # vertical flip + outputs_list.append(model(torch.rot90(images, k=1, dims=[2, 3]))) # rotate 90° + + # Rata-ratakan probabilitas + outputs_avg = torch.mean( + torch.stack([torch.softmax(out, dim=1) for out in outputs_list]), dim=0 + ) + + loss = criterion(outputs_avg, labels) + running_loss += loss.item() * images.size(0) + + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES, image_size=IMAGE_SIZE).to(DEVICE) + + # Gunakan FocalLoss (lebih tahan imbalance) + criterion = FocalLoss(alpha=1, gamma=2).to(DEVICE) + print("Menggunakan FocalLoss (alpha=1, gamma=2).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5) + print("Menggunakan scheduler ReduceLROnPlateau + EarlyStopping.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + best_model_wts = copy.deepcopy(model.state_dict()) + patience_counter = 0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # EarlyStopping check + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + best_model_wts = copy.deepcopy(model.state_dict()) + patience_counter = 0 + else: + patience_counter += 1 + if patience_counter >= PATIENCE: + print(f"Early stopping di epoch {epoch+1} karena tidak ada peningkatan validasi selama {PATIENCE} epoch.") + break + + # Load best model + model.load_state_dict(best_model_wts) + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") +a-rata + loss = criterion(outputs_avg, labels) + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES, image_size=IMAGE_SIZE).to(DEVICE) + + # Menggunakan CrossEntropyLoss dengan Label Smoothing untuk regularisasi + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan CrossEntropyLoss dengan Label Smoothing (0.1).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5) + print("Menggunakan scheduler ReduceLROnPlateau.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") \ No newline at end of file diff --git a/.history/src/train_20250930190209.py b/.history/src/train_20250930190209.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.history/src/train_20250930190217.py b/.history/src/train_20250930190217.py new file mode 100644 index 0000000000000000000000000000000000000000..11108accae88f1ad9bdeb53ecabb6fd7c6ffe49f --- /dev/null +++ b/.history/src/train_20250930190217.py @@ -0,0 +1,181 @@ +# src/train.py (dengan TTA + FocalLoss + EarlyStopping) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau +import copy + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/new_model' +IMAGE_SIZE = 384 +BATCH_SIZE = 8 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 +MODEL_NAME = 'best_model_final_TTA_Focal.pth' +NUM_CLASSES = 4 +PATIENCE = 7 # untuk EarlyStopping + +# --- 2. TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss, correct_predictions, total_samples = 0.0, 0, 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Validasi dengan Test-Time Augmentation (TTA). + Gunakan prediksi rata-rata dari beberapa augmentasi sederhana. + """ + model.eval() + running_loss, correct_predictions, total_samples = 0.0, 0, 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: original, flip H, flip V, rotate 90 + outputs_list = [] + outputs_list.append(model(images)) # original + outputs_list.append(model(torch.flip(images, dims=[3]))) # horizontal flip + outputs_list.append(model(torch.flip(images, dims=[2]))) # vertical flip + outputs_list.append(model(torch.rot90(images, k=1, dims=[2, 3]))) # rotate 90° + + # Rata-ratakan probabilitas + outputs_avg = torch.mean( + torch.stack([torch.softmax(out, dim=1) for out in outputs_list]), dim=0 + ) + + loss = criterion(outputs_avg, labels) + running_loss += loss.item() * images.size(0) + + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES, image_size=IMAGE_SIZE).to(DEVICE) + + # Gunakan FocalLoss (lebih tahan imbalance) + criterion = FocalLoss(alpha=1, gamma=2).to(DEVICE) + print("Menggunakan FocalLoss (alpha=1, gamma=2).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5) + print("Menggunakan scheduler ReduceLROnPlateau + EarlyStopping.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + best_model_wts = copy.deepcopy(model.state_dict()) + patience_counter = 0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # EarlyStopping check + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + best_model_wts = copy.deepcopy(model.state_dict()) + patience_counter = 0 + else: + patience_counter += 1 + if patience_counter >= PATIENCE: + print(f"Early stopping di epoch {epoch+1} karena tidak ada peningkatan validasi selama {PATIENCE} epoch.") + break + + # Load best model + model.load_state_dict(best_model_wts) + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") diff --git a/.history/src/train_20250930190331.py b/.history/src/train_20250930190331.py new file mode 100644 index 0000000000000000000000000000000000000000..88f96be0e08ccb632e59354e0c464ee823694e7d --- /dev/null +++ b/.history/src/train_20250930190331.py @@ -0,0 +1,181 @@ +# src/train.py (dengan TTA + FocalLoss + EarlyStopping) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau +import copy + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/new_model' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 +MODEL_NAME = 'best_model_final_TTA_Focal.pth' +NUM_CLASSES = 4 +PATIENCE = 7 # untuk EarlyStopping + +# --- 2. TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss, correct_predictions, total_samples = 0.0, 0, 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Validasi dengan Test-Time Augmentation (TTA). + Gunakan prediksi rata-rata dari beberapa augmentasi sederhana. + """ + model.eval() + running_loss, correct_predictions, total_samples = 0.0, 0, 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: original, flip H, flip V, rotate 90 + outputs_list = [] + outputs_list.append(model(images)) # original + outputs_list.append(model(torch.flip(images, dims=[3]))) # horizontal flip + outputs_list.append(model(torch.flip(images, dims=[2]))) # vertical flip + outputs_list.append(model(torch.rot90(images, k=1, dims=[2, 3]))) # rotate 90° + + # Rata-ratakan probabilitas + outputs_avg = torch.mean( + torch.stack([torch.softmax(out, dim=1) for out in outputs_list]), dim=0 + ) + + loss = criterion(outputs_avg, labels) + running_loss += loss.item() * images.size(0) + + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES, image_size=IMAGE_SIZE).to(DEVICE) + + # Gunakan FocalLoss (lebih tahan imbalance) + criterion = FocalLoss(alpha=1, gamma=2).to(DEVICE) + print("Menggunakan FocalLoss (alpha=1, gamma=2).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5) + print("Menggunakan scheduler ReduceLROnPlateau + EarlyStopping.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + best_model_wts = copy.deepcopy(model.state_dict()) + patience_counter = 0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # EarlyStopping check + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + best_model_wts = copy.deepcopy(model.state_dict()) + patience_counter = 0 + else: + patience_counter += 1 + if patience_counter >= PATIENCE: + print(f"Early stopping di epoch {epoch+1} karena tidak ada peningkatan validasi selama {PATIENCE} epoch.") + break + + # Load best model + model.load_state_dict(best_model_wts) + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") diff --git a/.history/src/train_20251001110508.py b/.history/src/train_20251001110508.py new file mode 100644 index 0000000000000000000000000000000000000000..82297773899a2229bacdc0ac5820734555bef233 --- /dev/null +++ b/.history/src/train_20251001110508.py @@ -0,0 +1,181 @@ +# src/train.py (dengan TTA + FocalLoss + EarlyStopping) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau +import copy + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/new_model2' +IMAGE_SIZE = 224 +BATCH_SIZE = 8 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 +MODEL_NAME = 'best_model_final_TTA_Focal.pth' +NUM_CLASSES = 4 +PATIENCE = 7 # untuk EarlyStopping + +# --- 2. TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss, correct_predictions, total_samples = 0.0, 0, 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Validasi dengan Test-Time Augmentation (TTA). + Gunakan prediksi rata-rata dari beberapa augmentasi sederhana. + """ + model.eval() + running_loss, correct_predictions, total_samples = 0.0, 0, 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: original, flip H, flip V, rotate 90 + outputs_list = [] + outputs_list.append(model(images)) # original + outputs_list.append(model(torch.flip(images, dims=[3]))) # horizontal flip + outputs_list.append(model(torch.flip(images, dims=[2]))) # vertical flip + outputs_list.append(model(torch.rot90(images, k=1, dims=[2, 3]))) # rotate 90° + + # Rata-ratakan probabilitas + outputs_avg = torch.mean( + torch.stack([torch.softmax(out, dim=1) for out in outputs_list]), dim=0 + ) + + loss = criterion(outputs_avg, labels) + running_loss += loss.item() * images.size(0) + + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES, image_size=IMAGE_SIZE).to(DEVICE) + + # Gunakan FocalLoss (lebih tahan imbalance) + criterion = FocalLoss(alpha=1, gamma=2).to(DEVICE) + print("Menggunakan FocalLoss (alpha=1, gamma=2).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5) + print("Menggunakan scheduler ReduceLROnPlateau + EarlyStopping.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + best_model_wts = copy.deepcopy(model.state_dict()) + patience_counter = 0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # EarlyStopping check + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + best_model_wts = copy.deepcopy(model.state_dict()) + patience_counter = 0 + else: + patience_counter += 1 + if patience_counter >= PATIENCE: + print(f"Early stopping di epoch {epoch+1} karena tidak ada peningkatan validasi selama {PATIENCE} epoch.") + break + + # Load best model + model.load_state_dict(best_model_wts) + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") diff --git a/.history/src/train_20251001111557.py b/.history/src/train_20251001111557.py new file mode 100644 index 0000000000000000000000000000000000000000..b8fcc0ab4a064e26d11e350b4a4db7456a7b8ebd --- /dev/null +++ b/.history/src/train_20251001111557.py @@ -0,0 +1,181 @@ +# src/train.py (dengan TTA + FocalLoss + EarlyStopping) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau +import copy + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/new_model2' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 +MODEL_NAME = 'best_model_final_TTA_Focal.pth' +NUM_CLASSES = 4 +PATIENCE = 7 # untuk EarlyStopping + +# --- 2. TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss, correct_predictions, total_samples = 0.0, 0, 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Validasi dengan Test-Time Augmentation (TTA). + Gunakan prediksi rata-rata dari beberapa augmentasi sederhana. + """ + model.eval() + running_loss, correct_predictions, total_samples = 0.0, 0, 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: original, flip H, flip V, rotate 90 + outputs_list = [] + outputs_list.append(model(images)) # original + outputs_list.append(model(torch.flip(images, dims=[3]))) # horizontal flip + outputs_list.append(model(torch.flip(images, dims=[2]))) # vertical flip + outputs_list.append(model(torch.rot90(images, k=1, dims=[2, 3]))) # rotate 90° + + # Rata-ratakan probabilitas + outputs_avg = torch.mean( + torch.stack([torch.softmax(out, dim=1) for out in outputs_list]), dim=0 + ) + + loss = criterion(outputs_avg, labels) + running_loss += loss.item() * images.size(0) + + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES, image_size=IMAGE_SIZE).to(DEVICE) + + # Gunakan FocalLoss (lebih tahan imbalance) + criterion = FocalLoss(alpha=1, gamma=2).to(DEVICE) + print("Menggunakan FocalLoss (alpha=1, gamma=2).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5) + print("Menggunakan scheduler ReduceLROnPlateau + EarlyStopping.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + best_model_wts = copy.deepcopy(model.state_dict()) + patience_counter = 0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # EarlyStopping check + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + best_model_wts = copy.deepcopy(model.state_dict()) + patience_counter = 0 + else: + patience_counter += 1 + if patience_counter >= PATIENCE: + print(f"Early stopping di epoch {epoch+1} karena tidak ada peningkatan validasi selama {PATIENCE} epoch.") + break + + # Load best model + model.load_state_dict(best_model_wts) + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") diff --git a/.history/src/train_20251001111734.py b/.history/src/train_20251001111734.py new file mode 100644 index 0000000000000000000000000000000000000000..20930f3ad0feca9f10dca867848309b3e0fdbab6 --- /dev/null +++ b/.history/src/train_20251001111734.py @@ -0,0 +1,180 @@ +# src/train.py (dengan TTA + FocalLoss + EarlyStopping) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau +import copy + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/new_model2' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 +MODEL_NAME = 'best_model_final_TTA_Focal.pth' +NUM_CLASSES = 4 +PATIENCE = 7 # untuk EarlyStopping + +# --- 2. TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss, correct_predictions, total_samples = 0.0, 0, 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Validasi dengan Test-Time Augmentation (TTA). + Gunakan prediksi rata-rata dari beberapa augmentasi sederhana. + """ + model.eval() + running_loss, correct_predictions, total_samples = 0.0, 0, 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: original, flip H, flip V, rotate 90 + outputs_list = [] + outputs_list.append(model(images)) # original + outputs_list.append(model(torch.flip(images, dims=[3]))) # horizontal flip + outputs_list.append(model(torch.flip(images, dims=[2]))) # vertical flip + outputs_list.append(model(torch.rot90(images, k=1, dims=[2, 3]))) # rotate 90° + + # Rata-ratakan probabilitas + outputs_avg = torch.mean( + torch.stack([torch.softmax(out, dim=1) for out in outputs_list]), dim=0 + ) + + loss = criterion(outputs_avg, labels) + running_loss += loss.item() * images.size(0) + + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES, image_size=IMAGE_SIZE).to(DEVICE) + + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan CrossEntropyLoss dengan Label Smoothing (0.1).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = ReduceLROnPlateau(optimizer_finetune, mode='min', factor=0.2, patience=5) + print("Menggunakan scheduler ReduceLROnPlateau + EarlyStopping.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + best_model_wts = copy.deepcopy(model.state_dict()) + patience_counter = 0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # EarlyStopping check + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + best_model_wts = copy.deepcopy(model.state_dict()) + patience_counter = 0 + else: + patience_counter += 1 + if patience_counter >= PATIENCE: + print(f"Early stopping di epoch {epoch+1} karena tidak ada peningkatan validasi selama {PATIENCE} epoch.") + break + + # Load best model + model.load_state_dict(best_model_wts) + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") diff --git a/.history/src/train_20251001111758.py b/.history/src/train_20251001111758.py new file mode 100644 index 0000000000000000000000000000000000000000..b180885b42ec10d7286fe0a18d2baeb422eee4de --- /dev/null +++ b/.history/src/train_20251001111758.py @@ -0,0 +1,180 @@ +# src/train.py (dengan TTA + FocalLoss + EarlyStopping) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau +import copy + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/new_model2' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 3e-5 +WEIGHT_DECAY = 0.05 +MODEL_NAME = 'best_model_final_TTA_Focal.pth' +NUM_CLASSES = 4 +PATIENCE = 7 # untuk EarlyStopping + +# --- 2. TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss, correct_predictions, total_samples = 0.0, 0, 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Validasi dengan Test-Time Augmentation (TTA). + Gunakan prediksi rata-rata dari beberapa augmentasi sederhana. + """ + model.eval() + running_loss, correct_predictions, total_samples = 0.0, 0, 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: original, flip H, flip V, rotate 90 + outputs_list = [] + outputs_list.append(model(images)) # original + outputs_list.append(model(torch.flip(images, dims=[3]))) # horizontal flip + outputs_list.append(model(torch.flip(images, dims=[2]))) # vertical flip + outputs_list.append(model(torch.rot90(images, k=1, dims=[2, 3]))) # rotate 90° + + # Rata-ratakan probabilitas + outputs_avg = torch.mean( + torch.stack([torch.softmax(out, dim=1) for out in outputs_list]), dim=0 + ) + + loss = criterion(outputs_avg, labels) + running_loss += loss.item() * images.size(0) + + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES, image_size=IMAGE_SIZE).to(DEVICE) + + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan CrossEntropyLoss dengan Label Smoothing (0.1).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + print("Menggunakan scheduler CosineAnnealingLR.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + best_model_wts = copy.deepcopy(model.state_dict()) + patience_counter = 0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # EarlyStopping check + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + best_model_wts = copy.deepcopy(model.state_dict()) + patience_counter = 0 + else: + patience_counter += 1 + if patience_counter >= PATIENCE: + print(f"Early stopping di epoch {epoch+1} karena tidak ada peningkatan validasi selama {PATIENCE} epoch.") + break + + # Load best model + model.load_state_dict(best_model_wts) + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") diff --git a/.history/src/train_20251001111820.py b/.history/src/train_20251001111820.py new file mode 100644 index 0000000000000000000000000000000000000000..5c48e5704077fad53b3bd4175174b63d9e25085b --- /dev/null +++ b/.history/src/train_20251001111820.py @@ -0,0 +1,180 @@ +# src/train.py (dengan TTA + FocalLoss + EarlyStopping) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau +import copy + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/new_model2' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 2e-5 +WEIGHT_DECAY = 0.05 +MODEL_NAME = 'best_model_final_TTA_Focal.pth' +NUM_CLASSES = 4 +PATIENCE = 7 # untuk EarlyStopping + +# --- 2. TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss, correct_predictions, total_samples = 0.0, 0, 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Validasi dengan Test-Time Augmentation (TTA). + Gunakan prediksi rata-rata dari beberapa augmentasi sederhana. + """ + model.eval() + running_loss, correct_predictions, total_samples = 0.0, 0, 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: original, flip H, flip V, rotate 90 + outputs_list = [] + outputs_list.append(model(images)) # original + outputs_list.append(model(torch.flip(images, dims=[3]))) # horizontal flip + outputs_list.append(model(torch.flip(images, dims=[2]))) # vertical flip + outputs_list.append(model(torch.rot90(images, k=1, dims=[2, 3]))) # rotate 90° + + # Rata-ratakan probabilitas + outputs_avg = torch.mean( + torch.stack([torch.softmax(out, dim=1) for out in outputs_list]), dim=0 + ) + + loss = criterion(outputs_avg, labels) + running_loss += loss.item() * images.size(0) + + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES, image_size=IMAGE_SIZE).to(DEVICE) + + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan CrossEntropyLoss dengan Label Smoothing (0.1).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + print("Menggunakan scheduler CosineAnnealingLR.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + best_model_wts = copy.deepcopy(model.state_dict()) + patience_counter = 0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # EarlyStopping check + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + best_model_wts = copy.deepcopy(model.state_dict()) + patience_counter = 0 + else: + patience_counter += 1 + if patience_counter >= PATIENCE: + print(f"Early stopping di epoch {epoch+1} karena tidak ada peningkatan validasi selama {PATIENCE} epoch.") + break + + # Load best model + model.load_state_dict(best_model_wts) + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") diff --git a/.history/src/train_20251001111824.py b/.history/src/train_20251001111824.py new file mode 100644 index 0000000000000000000000000000000000000000..e742fa8872091fa1ec451ce7ee0c4a34a405abdd --- /dev/null +++ b/.history/src/train_20251001111824.py @@ -0,0 +1,180 @@ +# src/train.py (dengan TTA + FocalLoss + EarlyStopping) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau +import copy + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/new_model2' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +NUM_WORKERS = 4 +EPOCHS = 75 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 2e-5 +WEIGHT_DECAY = 0.05 +MODEL_NAME = 'best_model_final_TTA_Focal.pth' +NUM_CLASSES = 4 +PATIENCE = 7 # untuk EarlyStopping + +# --- 2. TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss, correct_predictions, total_samples = 0.0, 0, 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Validasi dengan Test-Time Augmentation (TTA). + Gunakan prediksi rata-rata dari beberapa augmentasi sederhana. + """ + model.eval() + running_loss, correct_predictions, total_samples = 0.0, 0, 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: original, flip H, flip V, rotate 90 + outputs_list = [] + outputs_list.append(model(images)) # original + outputs_list.append(model(torch.flip(images, dims=[3]))) # horizontal flip + outputs_list.append(model(torch.flip(images, dims=[2]))) # vertical flip + outputs_list.append(model(torch.rot90(images, k=1, dims=[2, 3]))) # rotate 90° + + # Rata-ratakan probabilitas + outputs_avg = torch.mean( + torch.stack([torch.softmax(out, dim=1) for out in outputs_list]), dim=0 + ) + + loss = criterion(outputs_avg, labels) + running_loss += loss.item() * images.size(0) + + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES, image_size=IMAGE_SIZE).to(DEVICE) + + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan CrossEntropyLoss dengan Label Smoothing (0.1).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + print("Menggunakan scheduler CosineAnnealingLR.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + best_model_wts = copy.deepcopy(model.state_dict()) + patience_counter = 0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # EarlyStopping check + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + best_model_wts = copy.deepcopy(model.state_dict()) + patience_counter = 0 + else: + patience_counter += 1 + if patience_counter >= PATIENCE: + print(f"Early stopping di epoch {epoch+1} karena tidak ada peningkatan validasi selama {PATIENCE} epoch.") + break + + # Load best model + model.load_state_dict(best_model_wts) + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") diff --git a/.history/src/train_20251002093258.py b/.history/src/train_20251002093258.py new file mode 100644 index 0000000000000000000000000000000000000000..5c48e5704077fad53b3bd4175174b63d9e25085b --- /dev/null +++ b/.history/src/train_20251002093258.py @@ -0,0 +1,180 @@ +# src/train.py (dengan TTA + FocalLoss + EarlyStopping) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau +import copy + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/new_model2' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 2e-5 +WEIGHT_DECAY = 0.05 +MODEL_NAME = 'best_model_final_TTA_Focal.pth' +NUM_CLASSES = 4 +PATIENCE = 7 # untuk EarlyStopping + +# --- 2. TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss, correct_predictions, total_samples = 0.0, 0, 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Validasi dengan Test-Time Augmentation (TTA). + Gunakan prediksi rata-rata dari beberapa augmentasi sederhana. + """ + model.eval() + running_loss, correct_predictions, total_samples = 0.0, 0, 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: original, flip H, flip V, rotate 90 + outputs_list = [] + outputs_list.append(model(images)) # original + outputs_list.append(model(torch.flip(images, dims=[3]))) # horizontal flip + outputs_list.append(model(torch.flip(images, dims=[2]))) # vertical flip + outputs_list.append(model(torch.rot90(images, k=1, dims=[2, 3]))) # rotate 90° + + # Rata-ratakan probabilitas + outputs_avg = torch.mean( + torch.stack([torch.softmax(out, dim=1) for out in outputs_list]), dim=0 + ) + + loss = criterion(outputs_avg, labels) + running_loss += loss.item() * images.size(0) + + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES, image_size=IMAGE_SIZE).to(DEVICE) + + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan CrossEntropyLoss dengan Label Smoothing (0.1).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + print("Menggunakan scheduler CosineAnnealingLR.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + best_model_wts = copy.deepcopy(model.state_dict()) + patience_counter = 0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # EarlyStopping check + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + best_model_wts = copy.deepcopy(model.state_dict()) + patience_counter = 0 + else: + patience_counter += 1 + if patience_counter >= PATIENCE: + print(f"Early stopping di epoch {epoch+1} karena tidak ada peningkatan validasi selama {PATIENCE} epoch.") + break + + # Load best model + model.load_state_dict(best_model_wts) + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") diff --git a/.history/src/train_20251002093300.py b/.history/src/train_20251002093300.py new file mode 100644 index 0000000000000000000000000000000000000000..3441a17fcb59c599164b8e89638bce3b23c7c157 --- /dev/null +++ b/.history/src/train_20251002093300.py @@ -0,0 +1,180 @@ +# src/train.py (dengan TTA + FocalLoss + EarlyStopping) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau +import copy + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/new_model2' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 2e-5 +WEIGHT_DECAY = 0.05 +MODEL_NAME = 'best_model_final_TTA_Focal.pth' +NUM_CLASSES = 4 +PATIENCE = 7 # untuk EarlyStopping + +# --- 2. TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss, correct_predictions, total_samples = 0.0, 0, 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Validasi dengan Test-Time Augmentation (TTA). + Gunakan prediksi rata-rata dari beberapa augmentasi sederhana. + """ + model.eval() + running_loss, correct_predictions, total_samples = 0.0, 0, 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: original, flip H, flip V, rotate 90 + outputs_list = [] + outputs_list.append(model(images)) # original + outputs_list.append(model(torch.flip(images, dims=[3]))) # horizontal flip + outputs_list.append(model(torch.flip(images, dims=[2]))) # vertical flip + outputs_list.append(model(torch.rot90(images, k=1, dims=[2, 3]))) # rotate 90° + + # Rata-ratakan probabilitas + outputs_avg = torch.mean( + torch.stack([torch.softmax(out, dim=1) for out in outputs_list]), dim=0 + ) + + loss = criterion(outputs_avg, labels) + running_loss += loss.item() * images.size(0) + + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES, image_size=IMAGE_SIZE).to(DEVICE) + + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan CrossEntropyLoss dengan Label Smoothing (0.1).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + print("Menggunakan scheduler CosineAnnealingLR.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + best_model_wts = copy.deepcopy(model.state_dict()) + patience_counter = 0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # EarlyStopping check + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + best_model_wts = copy.deepcopy(model.state_dict()) + patience_counter = 0 + else: + patience_counter += 1 + if patience_counter >= PATIENCE: + print(f"Early stopping di epoch {epoch+1} karena tidak ada peningkatan validasi selama {PATIENCE} epoch.") + break + + # Load best model + model.load_state_dict(best_model_wts) + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") diff --git a/.history/src/train_20251002093435.py b/.history/src/train_20251002093435.py new file mode 100644 index 0000000000000000000000000000000000000000..0a8eb721d1e176166f8cc9458c6258b4b2efb5b7 --- /dev/null +++ b/.history/src/train_20251002093435.py @@ -0,0 +1,180 @@ +# src/train.py (dengan TTA + FocalLoss + EarlyStopping) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau +import copy + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/new_model2' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 2e-5 +WEIGHT_DECAY = 0.05 +MODEL_NAME = 'best_model_final_TTA_Focal.pth' +NUM_CLASSES = 4 +PATIENCE = 7 # untuk EarlyStopping + +# --- 2. TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss, correct_predictions, total_samples = 0.0, 0, 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Validasi dengan Test-Time Augmentation (TTA). + Gunakan prediksi rata-rata dari beberapa augmentasi sederhana. + """ + model.eval() + running_loss, correct_predictions, total_samples = 0.0, 0, 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: original, flip H, flip V, rotate 90 + outputs_list = [] + outputs_list.append(model(images)) # original + outputs_list.append(model(torch.flip(images, dims=[3]))) # horizontal flip + outputs_list.append(model(torch.flip(images, dims=[2]))) # vertical flip + outputs_list.append(model(torch.rot90(images, k=1, dims=[2, 3]))) # rotate 90° + + # Rata-ratakan probabilitas + outputs_avg = torch.mean( + torch.stack([torch.softmax(out, dim=1) for out in outputs_list]), dim=0 + ) + + loss = criterion(outputs_avg, labels) + running_loss += loss.item() * images.size(0) + + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES, image_size=IMAGE_SIZE).to(DEVICE) + + criterion = nn.CrossEntropyLoss(label_smoothing=0.2).to(DEVICE) + print("Menggunakan CrossEntropyLoss dengan Label Smoothing (0.1).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + print("Menggunakan scheduler CosineAnnealingLR.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + best_model_wts = copy.deepcopy(model.state_dict()) + patience_counter = 0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # EarlyStopping check + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + best_model_wts = copy.deepcopy(model.state_dict()) + patience_counter = 0 + else: + patience_counter += 1 + if patience_counter >= PATIENCE: + print(f"Early stopping di epoch {epoch+1} karena tidak ada peningkatan validasi selama {PATIENCE} epoch.") + break + + # Load best model + model.load_state_dict(best_model_wts) + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") diff --git a/.history/src/train_20251002093437.py b/.history/src/train_20251002093437.py new file mode 100644 index 0000000000000000000000000000000000000000..3441a17fcb59c599164b8e89638bce3b23c7c157 --- /dev/null +++ b/.history/src/train_20251002093437.py @@ -0,0 +1,180 @@ +# src/train.py (dengan TTA + FocalLoss + EarlyStopping) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau +import copy + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/new_model2' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 2e-5 +WEIGHT_DECAY = 0.05 +MODEL_NAME = 'best_model_final_TTA_Focal.pth' +NUM_CLASSES = 4 +PATIENCE = 7 # untuk EarlyStopping + +# --- 2. TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss, correct_predictions, total_samples = 0.0, 0, 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Validasi dengan Test-Time Augmentation (TTA). + Gunakan prediksi rata-rata dari beberapa augmentasi sederhana. + """ + model.eval() + running_loss, correct_predictions, total_samples = 0.0, 0, 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: original, flip H, flip V, rotate 90 + outputs_list = [] + outputs_list.append(model(images)) # original + outputs_list.append(model(torch.flip(images, dims=[3]))) # horizontal flip + outputs_list.append(model(torch.flip(images, dims=[2]))) # vertical flip + outputs_list.append(model(torch.rot90(images, k=1, dims=[2, 3]))) # rotate 90° + + # Rata-ratakan probabilitas + outputs_avg = torch.mean( + torch.stack([torch.softmax(out, dim=1) for out in outputs_list]), dim=0 + ) + + loss = criterion(outputs_avg, labels) + running_loss += loss.item() * images.size(0) + + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES, image_size=IMAGE_SIZE).to(DEVICE) + + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan CrossEntropyLoss dengan Label Smoothing (0.1).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + print("Menggunakan scheduler CosineAnnealingLR.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + best_model_wts = copy.deepcopy(model.state_dict()) + patience_counter = 0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # EarlyStopping check + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + best_model_wts = copy.deepcopy(model.state_dict()) + patience_counter = 0 + else: + patience_counter += 1 + if patience_counter >= PATIENCE: + print(f"Early stopping di epoch {epoch+1} karena tidak ada peningkatan validasi selama {PATIENCE} epoch.") + break + + # Load best model + model.load_state_dict(best_model_wts) + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") diff --git a/.history/src/upload_model_20251004154752.py b/.history/src/upload_model_20251004154752.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.history/src/upload_model_20251004154803.py b/.history/src/upload_model_20251004154803.py new file mode 100644 index 0000000000000000000000000000000000000000..c64c88d40dcee881f8ef87c589a6a49e1ea28fd1 --- /dev/null +++ b/.history/src/upload_model_20251004154803.py @@ -0,0 +1,17 @@ +from huggingface_hub import upload_file + +# === 1. Ganti path lokal model === +local_model_path = r"C:\Users\user\Documents\Project\Cataract-ViT\outputs\models\best_swin_model_final.pth" + +# === 2. Ganti repo_id sesuai nama repo kamu === +repo_id = "decoderr24/ViT-for-Limited-Medical-Data" + +# === 3. Upload file model === +upload_file( + path_or_fileobj=local_model_path, + path_in_repo="best_swin_model_final.pth", # nama file di repo Hugging Face + repo_id=repo_id, + repo_type="model" +) + +print("✅ Model berhasil diupload ke Hugging Face Hub!") diff --git a/.history/src/upload_model_20251004154855.py b/.history/src/upload_model_20251004154855.py new file mode 100644 index 0000000000000000000000000000000000000000..83b3aa2c969153c37f90d148525ed63f7d3d6bfe --- /dev/null +++ b/.history/src/upload_model_20251004154855.py @@ -0,0 +1,17 @@ +from huggingface_hub import upload_file + +# === 1. Ganti path lokal model === +local_model_path = r"C:\Users\user\Documents\Project\Cataract-ViT\outputs\models\best_swin_model_final.pth" + +# === 2. Ganti repo_id sesuai nama repo kamu === +repo_id = " https://huggingface.co/Decoder24/ViT-for-Limited-Medical-Data" + +# === 3. Upload file model === +upload_file( + path_or_fileobj=local_model_path, + path_in_repo="best_swin_model_final.pth", # nama file di repo Hugging Face + repo_id=repo_id, + repo_type="model" +) + +print("✅ Model berhasil diupload ke Hugging Face Hub!") diff --git a/.history/src/upload_model_20251004154857.py b/.history/src/upload_model_20251004154857.py new file mode 100644 index 0000000000000000000000000000000000000000..c64c88d40dcee881f8ef87c589a6a49e1ea28fd1 --- /dev/null +++ b/.history/src/upload_model_20251004154857.py @@ -0,0 +1,17 @@ +from huggingface_hub import upload_file + +# === 1. Ganti path lokal model === +local_model_path = r"C:\Users\user\Documents\Project\Cataract-ViT\outputs\models\best_swin_model_final.pth" + +# === 2. Ganti repo_id sesuai nama repo kamu === +repo_id = "decoderr24/ViT-for-Limited-Medical-Data" + +# === 3. Upload file model === +upload_file( + path_or_fileobj=local_model_path, + path_in_repo="best_swin_model_final.pth", # nama file di repo Hugging Face + repo_id=repo_id, + repo_type="model" +) + +print("✅ Model berhasil diupload ke Hugging Face Hub!") diff --git a/.history/src/upload_model_20251004154858.py b/.history/src/upload_model_20251004154858.py new file mode 100644 index 0000000000000000000000000000000000000000..83b3aa2c969153c37f90d148525ed63f7d3d6bfe --- /dev/null +++ b/.history/src/upload_model_20251004154858.py @@ -0,0 +1,17 @@ +from huggingface_hub import upload_file + +# === 1. Ganti path lokal model === +local_model_path = r"C:\Users\user\Documents\Project\Cataract-ViT\outputs\models\best_swin_model_final.pth" + +# === 2. Ganti repo_id sesuai nama repo kamu === +repo_id = " https://huggingface.co/Decoder24/ViT-for-Limited-Medical-Data" + +# === 3. Upload file model === +upload_file( + path_or_fileobj=local_model_path, + path_in_repo="best_swin_model_final.pth", # nama file di repo Hugging Face + repo_id=repo_id, + repo_type="model" +) + +print("✅ Model berhasil diupload ke Hugging Face Hub!") diff --git a/.history/src/upload_model_20251004154903.py b/.history/src/upload_model_20251004154903.py new file mode 100644 index 0000000000000000000000000000000000000000..92d08c81ff32d6211b02c650a8b9efca35159880 --- /dev/null +++ b/.history/src/upload_model_20251004154903.py @@ -0,0 +1,17 @@ +from huggingface_hub import upload_file + +# === 1. Ganti path lokal model === +local_model_path = r"C:\Users\user\Documents\Project\Cataract-ViT\outputs\models\best_swin_model_final.pth" + +# === 2. Ganti repo_id sesuai nama repo kamu === +repo_id = "Decoder24/ViT-for-Limited-Medical-Data" + +# === 3. Upload file model === +upload_file( + path_or_fileobj=local_model_path, + path_in_repo="best_swin_model_final.pth", # nama file di repo Hugging Face + repo_id=repo_id, + repo_type="model" +) + +print("✅ Model berhasil diupload ke Hugging Face Hub!") diff --git a/.history/src/upload_model_20251004160951.py b/.history/src/upload_model_20251004160951.py new file mode 100644 index 0000000000000000000000000000000000000000..3e1dc4aaae91e850b5851b390f92443f9103d1e2 --- /dev/null +++ b/.history/src/upload_model_20251004160951.py @@ -0,0 +1,17 @@ +from huggingface_hub import HfApi + +api = HfApi() + +# ganti dengan repo kamu di Hugging Face +repo_id = "Decoder24/ViT-for-Limited-Medical-Data" +model_path = r"C:\Users\user\Documents\Project\Cataract-ViT\outputs\models\best_swin_weights_only.pth" + +# Upload ke repo +api.upload_file( + path_or_fileobj=model_path, + path_in_repo="best_swin_weights_only.pth", + repo_id=repo_id, + repo_type="model" +) + +print("✅ File berhasil diupload ke Hugging Face Hub!") diff --git a/.history/src/utils_20250923122302.py b/.history/src/utils_20250923122302.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.history/src/utils_20250923144453.py b/.history/src/utils_20250923144453.py new file mode 100644 index 0000000000000000000000000000000000000000..3bf151dcbd4b97503465ca47c632fd810e639812 --- /dev/null +++ b/.history/src/utils_20250923144453.py @@ -0,0 +1,48 @@ +# src/utils.py + +import torch +import matplotlib.pyplot as plt +import os + +# Atur style plot agar terlihat lebih bagus +plt.style.use('ggplot') + +def save_model(epochs, model, optimizer, criterion, model_path): + """ + Fungsi untuk menyimpan checkpoint model. + """ + print(f"Menyimpan model ke {model_path}") + # Buat direktori jika belum ada + os.makedirs(os.path.dirname(model_path), exist_ok=True) + torch.save({ + 'epoch': epochs, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'loss': criterion, + }, model_path) + +def save_plots(train_acc, valid_acc, train_loss, valid_loss, plot_path): + """ + Fungsi untuk menyimpan plot akurasi dan loss selama training. + """ + print(f"Menyimpan plot ke {plot_path}") + # Buat direktori jika belum ada + os.makedirs(os.path.dirname(plot_path), exist_ok=True) + + # Plot Akurasi + plt.figure(figsize=(10, 7)) + plt.plot(train_acc, color='green', linestyle='-', label='train accuracy') + plt.plot(valid_acc, color='blue', linestyle='-', label='validation accuracy') + plt.xlabel('Epochs') + plt.ylabel('Accuracy') + plt.legend() + plt.savefig(f"{plot_path}/accuracy.png") + + # Plot Loss + plt.figure(figsize=(10, 7)) + plt.plot(train_loss, color='orange', linestyle='-', label='train loss') + plt.plot(valid_loss, color='red', linestyle='-', label='validation loss') + plt.xlabel('Epochs') + plt.ylabel('Loss') + plt.legend() + plt.savefig(f"{plot_path}/loss.png") \ No newline at end of file diff --git a/.history/src/utils_20250923144516.py b/.history/src/utils_20250923144516.py new file mode 100644 index 0000000000000000000000000000000000000000..3bf151dcbd4b97503465ca47c632fd810e639812 --- /dev/null +++ b/.history/src/utils_20250923144516.py @@ -0,0 +1,48 @@ +# src/utils.py + +import torch +import matplotlib.pyplot as plt +import os + +# Atur style plot agar terlihat lebih bagus +plt.style.use('ggplot') + +def save_model(epochs, model, optimizer, criterion, model_path): + """ + Fungsi untuk menyimpan checkpoint model. + """ + print(f"Menyimpan model ke {model_path}") + # Buat direktori jika belum ada + os.makedirs(os.path.dirname(model_path), exist_ok=True) + torch.save({ + 'epoch': epochs, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'loss': criterion, + }, model_path) + +def save_plots(train_acc, valid_acc, train_loss, valid_loss, plot_path): + """ + Fungsi untuk menyimpan plot akurasi dan loss selama training. + """ + print(f"Menyimpan plot ke {plot_path}") + # Buat direktori jika belum ada + os.makedirs(os.path.dirname(plot_path), exist_ok=True) + + # Plot Akurasi + plt.figure(figsize=(10, 7)) + plt.plot(train_acc, color='green', linestyle='-', label='train accuracy') + plt.plot(valid_acc, color='blue', linestyle='-', label='validation accuracy') + plt.xlabel('Epochs') + plt.ylabel('Accuracy') + plt.legend() + plt.savefig(f"{plot_path}/accuracy.png") + + # Plot Loss + plt.figure(figsize=(10, 7)) + plt.plot(train_loss, color='orange', linestyle='-', label='train loss') + plt.plot(valid_loss, color='red', linestyle='-', label='validation loss') + plt.xlabel('Epochs') + plt.ylabel('Loss') + plt.legend() + plt.savefig(f"{plot_path}/loss.png") \ No newline at end of file diff --git a/.history/src/utils_20250926074047.py b/.history/src/utils_20250926074047.py new file mode 100644 index 0000000000000000000000000000000000000000..3c7bb926826f0816e6684889a47f459ddfbf4138 --- /dev/null +++ b/.history/src/utils_20250926074047.py @@ -0,0 +1,68 @@ +# src/utils.py + +import torch +import matplotlib.pyplot as plt +import os +import seaborn as sns +from sklearn.metrics import confusion_matrix + +# Atur style plot agar terlihat lebih bagus +plt.style.use('ggplot') + +def save_model(epochs, model, optimizer, criterion, model_path): + """ + Fungsi untuk menyimpan checkpoint model. + """ + print(f"Menyimpan model ke {model_path}") + os.makedirs(os.path.dirname(model_path), exist_ok=True) + torch.save({ + 'epoch': epochs, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'loss': criterion, + }, model_path) + +def save_plots(train_acc, valid_acc, train_loss, valid_loss, plot_path): + """ + Fungsi untuk menyimpan plot akurasi dan loss selama training. + """ + print(f"Menyimpan plot ke {plot_path}") + os.makedirs(os.path.dirname(plot_path), exist_ok=True) + + # Plot Akurasi + plt.figure(figsize=(10, 7)) + plt.plot(train_acc, color='green', linestyle='-', label='train accuracy') + plt.plot(valid_acc, color='blue', linestyle='-', label='validation accuracy') + plt.xlabel('Epochs') + plt.ylabel('Accuracy') + plt.legend() + plt.savefig(f"{plot_path}/accuracy.png") + + # Plot Loss + plt.figure(figsize=(10, 7)) + plt.plot(train_loss, color='orange', linestyle='-', label='train loss') + plt.plot(valid_loss, color='red', linestyle='-', label='validation loss') + plt.xlabel('Epochs') + plt.ylabel('Loss') + plt.legend() + plt.savefig(f"{plot_path}/loss.png") + +def save_confusion_matrix(y_true, y_pred, class_names, save_path): + """ + Menyimpan plot confusion matrix untuk analisis kesalahan model. + """ + cm = confusion_matrix(y_true, y_pred) + plt.figure(figsize=(10, 8)) + sns.heatmap( + cm, + annot=True, + fmt='d', + cmap='Blues', + xticklabels=class_names, + yticklabels=class_names + ) + plt.xlabel('Predicted Label') + plt.ylabel('True Label') + plt.title('Confusion Matrix of Best Validation Model') + plt.savefig(save_path) + print(f"Confusion matrix disimpan di {save_path}") \ No newline at end of file diff --git a/.history/src/utils_20250927100303.py b/.history/src/utils_20250927100303.py new file mode 100644 index 0000000000000000000000000000000000000000..2f90c96910330df585db72ec5a5cc976b5e79056 --- /dev/null +++ b/.history/src/utils_20250927100303.py @@ -0,0 +1,69 @@ +# src/utils.py + +import torch +import matplotlib.pyplot as plt +import os +import seaborn as sns +from sklearn.metrics import confusion_matrix +import torch.nn.functional as F + +# Atur style plot agar terlihat lebih bagus +plt.style.use('ggplot') + +def save_model(epochs, model, optimizer, criterion, model_path): + """ + Fungsi untuk menyimpan checkpoint model. + """ + print(f"Menyimpan model ke {model_path}") + os.makedirs(os.path.dirname(model_path), exist_ok=True) + torch.save({ + 'epoch': epochs, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'loss': criterion, + }, model_path) + +def save_plots(train_acc, valid_acc, train_loss, valid_loss, plot_path): + """ + Fungsi untuk menyimpan plot akurasi dan loss selama training. + """ + print(f"Menyimpan plot ke {plot_path}") + os.makedirs(os.path.dirname(plot_path), exist_ok=True) + + # Plot Akurasi + plt.figure(figsize=(10, 7)) + plt.plot(train_acc, color='green', linestyle='-', label='train accuracy') + plt.plot(valid_acc, color='blue', linestyle='-', label='validation accuracy') + plt.xlabel('Epochs') + plt.ylabel('Accuracy') + plt.legend() + plt.savefig(f"{plot_path}/accuracy.png") + + # Plot Loss + plt.figure(figsize=(10, 7)) + plt.plot(train_loss, color='orange', linestyle='-', label='train loss') + plt.plot(valid_loss, color='red', linestyle='-', label='validation loss') + plt.xlabel('Epochs') + plt.ylabel('Loss') + plt.legend() + plt.savefig(f"{plot_path}/loss.png") + +def save_confusion_matrix(y_true, y_pred, class_names, save_path): + """ + Menyimpan plot confusion matrix untuk analisis kesalahan model. + """ + cm = confusion_matrix(y_true, y_pred) + plt.figure(figsize=(10, 8)) + sns.heatmap( + cm, + annot=True, + fmt='d', + cmap='Blues', + xticklabels=class_names, + yticklabels=class_names + ) + plt.xlabel('Predicted Label') + plt.ylabel('True Label') + plt.title('Confusion Matrix of Best Validation Model') + plt.savefig(save_path) + print(f"Confusion matrix disimpan di {save_path}") \ No newline at end of file diff --git a/.history/src/utils_20250927100318.py b/.history/src/utils_20250927100318.py new file mode 100644 index 0000000000000000000000000000000000000000..7f9095a5f643fddb0e14bbc711473fb3a98b29da --- /dev/null +++ b/.history/src/utils_20250927100318.py @@ -0,0 +1,85 @@ +# src/utils.py + +import torch +import matplotlib.pyplot as plt +import os +import seaborn as sns +from sklearn.metrics import confusion_matrix +import torch.nn.functional as F + +# Atur style plot agar terlihat lebih bagus +plt.style.use('ggplot') +class FocalLoss(nn.Module): + def __init__(self, alpha=1, gamma=2, reduction='mean'): + super(FocalLoss, self).__init__() + self.alpha = alpha + self.gamma = gamma + self.reduction = reduction + + def forward(self, inputs, targets): + ce_loss = F.cross_entropy(inputs, targets, reduction='none') + pt = torch.exp(-ce_loss) + focal_loss = self.alpha * (1-pt)**self.gamma * ce_loss + if self.reduction == 'mean': + return focal_loss.mean() + elif self.reduction == 'sum': + return focal_loss.sum() + else: + return focal_loss +def save_model(epochs, model, optimizer, criterion, model_path): + """ + Fungsi untuk menyimpan checkpoint model. + """ + print(f"Menyimpan model ke {model_path}") + os.makedirs(os.path.dirname(model_path), exist_ok=True) + torch.save({ + 'epoch': epochs, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'loss': criterion, + }, model_path) + +def save_plots(train_acc, valid_acc, train_loss, valid_loss, plot_path): + """ + Fungsi untuk menyimpan plot akurasi dan loss selama training. + """ + print(f"Menyimpan plot ke {plot_path}") + os.makedirs(os.path.dirname(plot_path), exist_ok=True) + + # Plot Akurasi + plt.figure(figsize=(10, 7)) + plt.plot(train_acc, color='green', linestyle='-', label='train accuracy') + plt.plot(valid_acc, color='blue', linestyle='-', label='validation accuracy') + plt.xlabel('Epochs') + plt.ylabel('Accuracy') + plt.legend() + plt.savefig(f"{plot_path}/accuracy.png") + + # Plot Loss + plt.figure(figsize=(10, 7)) + plt.plot(train_loss, color='orange', linestyle='-', label='train loss') + plt.plot(valid_loss, color='red', linestyle='-', label='validation loss') + plt.xlabel('Epochs') + plt.ylabel('Loss') + plt.legend() + plt.savefig(f"{plot_path}/loss.png") + +def save_confusion_matrix(y_true, y_pred, class_names, save_path): + """ + Menyimpan plot confusion matrix untuk analisis kesalahan model. + """ + cm = confusion_matrix(y_true, y_pred) + plt.figure(figsize=(10, 8)) + sns.heatmap( + cm, + annot=True, + fmt='d', + cmap='Blues', + xticklabels=class_names, + yticklabels=class_names + ) + plt.xlabel('Predicted Label') + plt.ylabel('True Label') + plt.title('Confusion Matrix of Best Validation Model') + plt.savefig(save_path) + print(f"Confusion matrix disimpan di {save_path}") \ No newline at end of file diff --git a/.history/src/utils_20250927100319.py b/.history/src/utils_20250927100319.py new file mode 100644 index 0000000000000000000000000000000000000000..a2c8723aec5f85b94893a219fa839624995ced9c --- /dev/null +++ b/.history/src/utils_20250927100319.py @@ -0,0 +1,86 @@ +# src/utils.py + +import torch +import matplotlib.pyplot as plt +import os +import seaborn as sns +from sklearn.metrics import confusion_matrix +import torch.nn.functional as F + +# Atur style plot agar terlihat lebih bagus +plt.style.use('ggplot') +class FocalLoss(nn.Module): + def __init__(self, alpha=1, gamma=2, reduction='mean'): + super(FocalLoss, self).__init__() + self.alpha = alpha + self.gamma = gamma + self.reduction = reduction + + def forward(self, inputs, targets): + ce_loss = F.cross_entropy(inputs, targets, reduction='none') + pt = torch.exp(-ce_loss) + focal_loss = self.alpha * (1-pt)**self.gamma * ce_loss + if self.reduction == 'mean': + return focal_loss.mean() + elif self.reduction == 'sum': + return focal_loss.sum() + else: + return focal_loss + +def save_model(epochs, model, optimizer, criterion, model_path): + """ + Fungsi untuk menyimpan checkpoint model. + """ + print(f"Menyimpan model ke {model_path}") + os.makedirs(os.path.dirname(model_path), exist_ok=True) + torch.save({ + 'epoch': epochs, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'loss': criterion, + }, model_path) + +def save_plots(train_acc, valid_acc, train_loss, valid_loss, plot_path): + """ + Fungsi untuk menyimpan plot akurasi dan loss selama training. + """ + print(f"Menyimpan plot ke {plot_path}") + os.makedirs(os.path.dirname(plot_path), exist_ok=True) + + # Plot Akurasi + plt.figure(figsize=(10, 7)) + plt.plot(train_acc, color='green', linestyle='-', label='train accuracy') + plt.plot(valid_acc, color='blue', linestyle='-', label='validation accuracy') + plt.xlabel('Epochs') + plt.ylabel('Accuracy') + plt.legend() + plt.savefig(f"{plot_path}/accuracy.png") + + # Plot Loss + plt.figure(figsize=(10, 7)) + plt.plot(train_loss, color='orange', linestyle='-', label='train loss') + plt.plot(valid_loss, color='red', linestyle='-', label='validation loss') + plt.xlabel('Epochs') + plt.ylabel('Loss') + plt.legend() + plt.savefig(f"{plot_path}/loss.png") + +def save_confusion_matrix(y_true, y_pred, class_names, save_path): + """ + Menyimpan plot confusion matrix untuk analisis kesalahan model. + """ + cm = confusion_matrix(y_true, y_pred) + plt.figure(figsize=(10, 8)) + sns.heatmap( + cm, + annot=True, + fmt='d', + cmap='Blues', + xticklabels=class_names, + yticklabels=class_names + ) + plt.xlabel('Predicted Label') + plt.ylabel('True Label') + plt.title('Confusion Matrix of Best Validation Model') + plt.savefig(save_path) + print(f"Confusion matrix disimpan di {save_path}") \ No newline at end of file diff --git a/.history/src/utils_20250927100332.py b/.history/src/utils_20250927100332.py new file mode 100644 index 0000000000000000000000000000000000000000..2f90c96910330df585db72ec5a5cc976b5e79056 --- /dev/null +++ b/.history/src/utils_20250927100332.py @@ -0,0 +1,69 @@ +# src/utils.py + +import torch +import matplotlib.pyplot as plt +import os +import seaborn as sns +from sklearn.metrics import confusion_matrix +import torch.nn.functional as F + +# Atur style plot agar terlihat lebih bagus +plt.style.use('ggplot') + +def save_model(epochs, model, optimizer, criterion, model_path): + """ + Fungsi untuk menyimpan checkpoint model. + """ + print(f"Menyimpan model ke {model_path}") + os.makedirs(os.path.dirname(model_path), exist_ok=True) + torch.save({ + 'epoch': epochs, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'loss': criterion, + }, model_path) + +def save_plots(train_acc, valid_acc, train_loss, valid_loss, plot_path): + """ + Fungsi untuk menyimpan plot akurasi dan loss selama training. + """ + print(f"Menyimpan plot ke {plot_path}") + os.makedirs(os.path.dirname(plot_path), exist_ok=True) + + # Plot Akurasi + plt.figure(figsize=(10, 7)) + plt.plot(train_acc, color='green', linestyle='-', label='train accuracy') + plt.plot(valid_acc, color='blue', linestyle='-', label='validation accuracy') + plt.xlabel('Epochs') + plt.ylabel('Accuracy') + plt.legend() + plt.savefig(f"{plot_path}/accuracy.png") + + # Plot Loss + plt.figure(figsize=(10, 7)) + plt.plot(train_loss, color='orange', linestyle='-', label='train loss') + plt.plot(valid_loss, color='red', linestyle='-', label='validation loss') + plt.xlabel('Epochs') + plt.ylabel('Loss') + plt.legend() + plt.savefig(f"{plot_path}/loss.png") + +def save_confusion_matrix(y_true, y_pred, class_names, save_path): + """ + Menyimpan plot confusion matrix untuk analisis kesalahan model. + """ + cm = confusion_matrix(y_true, y_pred) + plt.figure(figsize=(10, 8)) + sns.heatmap( + cm, + annot=True, + fmt='d', + cmap='Blues', + xticklabels=class_names, + yticklabels=class_names + ) + plt.xlabel('Predicted Label') + plt.ylabel('True Label') + plt.title('Confusion Matrix of Best Validation Model') + plt.savefig(save_path) + print(f"Confusion matrix disimpan di {save_path}") \ No newline at end of file diff --git a/.history/src/utils_20250927100348.py b/.history/src/utils_20250927100348.py new file mode 100644 index 0000000000000000000000000000000000000000..be1ddc4b216f0e676f2a5c5e651ac55461589f9b --- /dev/null +++ b/.history/src/utils_20250927100348.py @@ -0,0 +1,87 @@ +# src/utils.py + +import torch +import matplotlib.pyplot as plt +import os +import seaborn as sns +from sklearn.metrics import confusion_matrix +import torch.nn.functional as F + +# Atur style plot agar terlihat lebih bagus +plt.style.use('ggplot') + +def save_model(epochs, model, optimizer, criterion, model_path): + """ + Fungsi untuk menyimpan checkpoint model. + """ + print(f"Menyimpan model ke {model_path}") + os.makedirs(os.path.dirname(model_path), exist_ok=True) + torch.save({ + 'epoch': epochs, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'loss': criterion, + }, model_path) + +def save_plots(train_acc, valid_acc, train_loss, valid_loss, plot_path): + """ + Fungsi untuk menyimpan plot akurasi dan loss selama training. + """ + print(f"Menyimpan plot ke {plot_path}") + os.makedirs(os.path.dirname(plot_path), exist_ok=True) + + # Plot Akurasi + plt.figure(figsize=(10, 7)) + plt.plot(train_acc, color='green', linestyle='-', label='train accuracy') + plt.plot(valid_acc, color='blue', linestyle='-', label='validation accuracy') + plt.xlabel('Epochs') + plt.ylabel('Accuracy') + plt.legend() + plt.savefig(f"{plot_path}/accuracy.png") + + # Plot Loss + plt.figure(figsize=(10, 7)) + plt.plot(train_loss, color='orange', linestyle='-', label='train loss') + plt.plot(valid_loss, color='red', linestyle='-', label='validation loss') + plt.xlabel('Epochs') + plt.ylabel('Loss') + plt.legend() + plt.savefig(f"{plot_path}/loss.png") + +def save_confusion_matrix(y_true, y_pred, class_names, save_path): + """ + Menyimpan plot confusion matrix untuk analisis kesalahan model. + """ + cm = confusion_matrix(y_true, y_pred) + plt.figure(figsize=(10, 8)) + sns.heatmap( + cm, + annot=True, + fmt='d', + cmap='Blues', + xticklabels=class_names, + yticklabels=class_names + ) + plt.xlabel('Predicted Label') + plt.ylabel('True Label') + plt.title('Confusion Matrix of Best Validation Model') + plt.savefig(save_path) + print(f"Confusion matrix disimpan di {save_path}") + + class FocalLoss(nn.Module): + def __init__(self, alpha=1, gamma=2, reduction='mean'): + super(FocalLoss, self).__init__() + self.alpha = alpha + self.gamma = gamma + self.reduction = reduction + + def forward(self, inputs, targets): + ce_loss = F.cross_entropy(inputs, targets, reduction='none') + pt = torch.exp(-ce_loss) + focal_loss = self.alpha * (1-pt)**self.gamma * ce_loss + if self.reduction == 'mean': + return focal_loss.mean() + elif self.reduction == 'sum': + return focal_loss.sum() + else: + return focal_loss \ No newline at end of file diff --git a/.history/src/utils_20250927100352.py b/.history/src/utils_20250927100352.py new file mode 100644 index 0000000000000000000000000000000000000000..a1b7351e0e0e6c90c4e5b9503d497f8507ac8712 --- /dev/null +++ b/.history/src/utils_20250927100352.py @@ -0,0 +1,71 @@ +# src/utils.py + +import torch +import matplotlib.pyplot as plt +import os +import seaborn as sns +from sklearn.metrics import confusion_matrix +import torch.nn.functional as F + +# Atur style plot agar terlihat lebih bagus +plt.style.use('ggplot') + +def save_model(epochs, model, optimizer, criterion, model_path): + """ + Fungsi untuk menyimpan checkpoint model. + """ + print(f"Menyimpan model ke {model_path}") + os.makedirs(os.path.dirname(model_path), exist_ok=True) + torch.save({ + 'epoch': epochs, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'loss': criterion, + }, model_path) + +def save_plots(train_acc, valid_acc, train_loss, valid_loss, plot_path): + """ + Fungsi untuk menyimpan plot akurasi dan loss selama training. + """ + print(f"Menyimpan plot ke {plot_path}") + os.makedirs(os.path.dirname(plot_path), exist_ok=True) + + # Plot Akurasi + plt.figure(figsize=(10, 7)) + plt.plot(train_acc, color='green', linestyle='-', label='train accuracy') + plt.plot(valid_acc, color='blue', linestyle='-', label='validation accuracy') + plt.xlabel('Epochs') + plt.ylabel('Accuracy') + plt.legend() + plt.savefig(f"{plot_path}/accuracy.png") + + # Plot Loss + plt.figure(figsize=(10, 7)) + plt.plot(train_loss, color='orange', linestyle='-', label='train loss') + plt.plot(valid_loss, color='red', linestyle='-', label='validation loss') + plt.xlabel('Epochs') + plt.ylabel('Loss') + plt.legend() + plt.savefig(f"{plot_path}/loss.png") + +def save_confusion_matrix(y_true, y_pred, class_names, save_path): + """ + Menyimpan plot confusion matrix untuk analisis kesalahan model. + """ + cm = confusion_matrix(y_true, y_pred) + plt.figure(figsize=(10, 8)) + sns.heatmap( + cm, + annot=True, + fmt='d', + cmap='Blues', + xticklabels=class_names, + yticklabels=class_names + ) + plt.xlabel('Predicted Label') + plt.ylabel('True Label') + plt.title('Confusion Matrix of Best Validation Model') + plt.savefig(save_path) + print(f"Confusion matrix disimpan di {save_path}") + + \ No newline at end of file diff --git a/.history/src/utils_20250927100357.py b/.history/src/utils_20250927100357.py new file mode 100644 index 0000000000000000000000000000000000000000..5e4959032997dbedd7baa28df42b68d88c5f61d1 --- /dev/null +++ b/.history/src/utils_20250927100357.py @@ -0,0 +1,88 @@ +# src/utils.py + +import torch +import matplotlib.pyplot as plt +import os +import seaborn as sns +from sklearn.metrics import confusion_matrix +import torch.nn.functional as F + +# Atur style plot agar terlihat lebih bagus +plt.style.use('ggplot') +class FocalLoss(nn.Module): + def __init__(self, alpha=1, gamma=2, reduction='mean'): + super(FocalLoss, self).__init__() + self.alpha = alpha + self.gamma = gamma + self.reduction = reduction + + def forward(self, inputs, targets): + ce_loss = F.cross_entropy(inputs, targets, reduction='none') + pt = torch.exp(-ce_loss) + focal_loss = self.alpha * (1-pt)**self.gamma * ce_loss + if self.reduction == 'mean': + return focal_loss.mean() + elif self.reduction == 'sum': + return focal_loss.sum() + else: + return focal_loss + +def save_model(epochs, model, optimizer, criterion, model_path): + """ + Fungsi untuk menyimpan checkpoint model. + """ + print(f"Menyimpan model ke {model_path}") + os.makedirs(os.path.dirname(model_path), exist_ok=True) + torch.save({ + 'epoch': epochs, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'loss': criterion, + }, model_path) + +def save_plots(train_acc, valid_acc, train_loss, valid_loss, plot_path): + """ + Fungsi untuk menyimpan plot akurasi dan loss selama training. + """ + print(f"Menyimpan plot ke {plot_path}") + os.makedirs(os.path.dirname(plot_path), exist_ok=True) + + # Plot Akurasi + plt.figure(figsize=(10, 7)) + plt.plot(train_acc, color='green', linestyle='-', label='train accuracy') + plt.plot(valid_acc, color='blue', linestyle='-', label='validation accuracy') + plt.xlabel('Epochs') + plt.ylabel('Accuracy') + plt.legend() + plt.savefig(f"{plot_path}/accuracy.png") + + # Plot Loss + plt.figure(figsize=(10, 7)) + plt.plot(train_loss, color='orange', linestyle='-', label='train loss') + plt.plot(valid_loss, color='red', linestyle='-', label='validation loss') + plt.xlabel('Epochs') + plt.ylabel('Loss') + plt.legend() + plt.savefig(f"{plot_path}/loss.png") + +def save_confusion_matrix(y_true, y_pred, class_names, save_path): + """ + Menyimpan plot confusion matrix untuk analisis kesalahan model. + """ + cm = confusion_matrix(y_true, y_pred) + plt.figure(figsize=(10, 8)) + sns.heatmap( + cm, + annot=True, + fmt='d', + cmap='Blues', + xticklabels=class_names, + yticklabels=class_names + ) + plt.xlabel('Predicted Label') + plt.ylabel('True Label') + plt.title('Confusion Matrix of Best Validation Model') + plt.savefig(save_path) + print(f"Confusion matrix disimpan di {save_path}") + + \ No newline at end of file diff --git a/.history/src/utils_20250927100449.py b/.history/src/utils_20250927100449.py new file mode 100644 index 0000000000000000000000000000000000000000..171938ca8a1bc81d7ed5e46f4cf77a57ddf807aa --- /dev/null +++ b/.history/src/utils_20250927100449.py @@ -0,0 +1,84 @@ +# src/utils.py + +import torch +import torch.nn as nn +import torch.nn.functional as F +import matplotlib.pyplot as plt +import os +import seaborn as sns +from sklearn.metrics import confusion_matrix + +# Atur style plot agar terlihat lebih bagus +plt.style.use('ggplot') + +def save_model(epochs, model, optimizer, criterion, model_path): + """ + Fungsi untuk menyimpan checkpoint model. + """ + print(f"Menyimpan model ke {model_path}") + os.makedirs(os.path.dirname(model_path), exist_ok=True) + torch.save({ + 'epoch': epochs, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'loss': criterion, + }, model_path) + +def save_plots(train_acc, valid_acc, train_loss, valid_loss, plot_path): + """ + Fungsi untuk menyimpan plot akurasi dan loss selama training. + """ + print(f"Menyimpan plot ke {plot_path}") + os.makedirs(os.path.dirname(plot_path), exist_ok=True) + + # Plot Akurasi + plt.figure(figsize=(10, 7)) + plt.plot(train_acc, color='green', linestyle='-', label='train accuracy') + plt.plot(valid_acc, color='blue', linestyle='-', label='validation accuracy') + plt.xlabel('Epochs') + plt.ylabel('Accuracy') + plt.legend() + plt.savefig(f"{plot_path}/accuracy.png") + + # Plot Loss + plt.figure(figsize=(10, 7)) + plt.plot(train_loss, color='orange', linestyle='-', label='train loss') + plt.plot(valid_loss, color='red', linestyle='-', label='validation loss') + plt.xlabel('Epochs') + plt.ylabel('Loss') + plt.legend() + plt.savefig(f"{plot_path}/loss.png") + +def save_confusion_matrix(y_true, y_pred, class_names, save_path): + """ + Menyimpan plot confusion matrix untuk analisis kesalahan model. + """ + cm = confusion_matrix(y_true, y_pred) + plt.figure(figsize=(10, 8)) + sns.heatmap( + cm, + annot=True, + fmt='d', + cmap='Blues', + xticklabels=class_names, + yticklabels=class_names + ) + plt.xlabel('Predicted Label') + plt.ylabel('True Label') + plt.title('Confusion Matrix of Best Validation Model') + plt.savefig(save_path) + print(f"Confusion matrix disimpan di {save_path}") + +class FocalLoss(nn.Module): + """ + Implementasi Focal Loss untuk menangani class imbalance + dan fokus pada sampel yang sulit. + """ + def __init__(self, alpha=1, gamma=2, reduction='mean'): + super(FocalLoss, self).__init__() + self.alpha = alpha + self.gamma = gamma + self.reduction = reduction + + def forward(self, inputs, targets): + ce_loss = F.cross_entropy( \ No newline at end of file diff --git a/.history/src/utils_20250927100452.py b/.history/src/utils_20250927100452.py new file mode 100644 index 0000000000000000000000000000000000000000..5e4959032997dbedd7baa28df42b68d88c5f61d1 --- /dev/null +++ b/.history/src/utils_20250927100452.py @@ -0,0 +1,88 @@ +# src/utils.py + +import torch +import matplotlib.pyplot as plt +import os +import seaborn as sns +from sklearn.metrics import confusion_matrix +import torch.nn.functional as F + +# Atur style plot agar terlihat lebih bagus +plt.style.use('ggplot') +class FocalLoss(nn.Module): + def __init__(self, alpha=1, gamma=2, reduction='mean'): + super(FocalLoss, self).__init__() + self.alpha = alpha + self.gamma = gamma + self.reduction = reduction + + def forward(self, inputs, targets): + ce_loss = F.cross_entropy(inputs, targets, reduction='none') + pt = torch.exp(-ce_loss) + focal_loss = self.alpha * (1-pt)**self.gamma * ce_loss + if self.reduction == 'mean': + return focal_loss.mean() + elif self.reduction == 'sum': + return focal_loss.sum() + else: + return focal_loss + +def save_model(epochs, model, optimizer, criterion, model_path): + """ + Fungsi untuk menyimpan checkpoint model. + """ + print(f"Menyimpan model ke {model_path}") + os.makedirs(os.path.dirname(model_path), exist_ok=True) + torch.save({ + 'epoch': epochs, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'loss': criterion, + }, model_path) + +def save_plots(train_acc, valid_acc, train_loss, valid_loss, plot_path): + """ + Fungsi untuk menyimpan plot akurasi dan loss selama training. + """ + print(f"Menyimpan plot ke {plot_path}") + os.makedirs(os.path.dirname(plot_path), exist_ok=True) + + # Plot Akurasi + plt.figure(figsize=(10, 7)) + plt.plot(train_acc, color='green', linestyle='-', label='train accuracy') + plt.plot(valid_acc, color='blue', linestyle='-', label='validation accuracy') + plt.xlabel('Epochs') + plt.ylabel('Accuracy') + plt.legend() + plt.savefig(f"{plot_path}/accuracy.png") + + # Plot Loss + plt.figure(figsize=(10, 7)) + plt.plot(train_loss, color='orange', linestyle='-', label='train loss') + plt.plot(valid_loss, color='red', linestyle='-', label='validation loss') + plt.xlabel('Epochs') + plt.ylabel('Loss') + plt.legend() + plt.savefig(f"{plot_path}/loss.png") + +def save_confusion_matrix(y_true, y_pred, class_names, save_path): + """ + Menyimpan plot confusion matrix untuk analisis kesalahan model. + """ + cm = confusion_matrix(y_true, y_pred) + plt.figure(figsize=(10, 8)) + sns.heatmap( + cm, + annot=True, + fmt='d', + cmap='Blues', + xticklabels=class_names, + yticklabels=class_names + ) + plt.xlabel('Predicted Label') + plt.ylabel('True Label') + plt.title('Confusion Matrix of Best Validation Model') + plt.savefig(save_path) + print(f"Confusion matrix disimpan di {save_path}") + + \ No newline at end of file diff --git a/.history/src/utils_20250927100458.py b/.history/src/utils_20250927100458.py new file mode 100644 index 0000000000000000000000000000000000000000..31a4c593d974be2bcc341d85f4e2ea7fb7ed0e74 --- /dev/null +++ b/.history/src/utils_20250927100458.py @@ -0,0 +1,93 @@ +# src/utils.py + +import torch +import torch.nn as nn +import torch.nn.functional as F +import matplotlib.pyplot as plt +import os +import seaborn as sns +from sklearn.metrics import confusion_matrix + +# Atur style plot agar terlihat lebih bagus +plt.style.use('ggplot') + +def save_model(epochs, model, optimizer, criterion, model_path): + """ + Fungsi untuk menyimpan checkpoint model. + """ + print(f"Menyimpan model ke {model_path}") + os.makedirs(os.path.dirname(model_path), exist_ok=True) + torch.save({ + 'epoch': epochs, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'loss': criterion, + }, model_path) + +def save_plots(train_acc, valid_acc, train_loss, valid_loss, plot_path): + """ + Fungsi untuk menyimpan plot akurasi dan loss selama training. + """ + print(f"Menyimpan plot ke {plot_path}") + os.makedirs(os.path.dirname(plot_path), exist_ok=True) + + # Plot Akurasi + plt.figure(figsize=(10, 7)) + plt.plot(train_acc, color='green', linestyle='-', label='train accuracy') + plt.plot(valid_acc, color='blue', linestyle='-', label='validation accuracy') + plt.xlabel('Epochs') + plt.ylabel('Accuracy') + plt.legend() + plt.savefig(f"{plot_path}/accuracy.png") + + # Plot Loss + plt.figure(figsize=(10, 7)) + plt.plot(train_loss, color='orange', linestyle='-', label='train loss') + plt.plot(valid_loss, color='red', linestyle='-', label='validation loss') + plt.xlabel('Epochs') + plt.ylabel('Loss') + plt.legend() + plt.savefig(f"{plot_path}/loss.png") + +def save_confusion_matrix(y_true, y_pred, class_names, save_path): + """ + Menyimpan plot confusion matrix untuk analisis kesalahan model. + """ + cm = confusion_matrix(y_true, y_pred) + plt.figure(figsize=(10, 8)) + sns.heatmap( + cm, + annot=True, + fmt='d', + cmap='Blues', + xticklabels=class_names, + yticklabels=class_names + ) + plt.xlabel('Predicted Label') + plt.ylabel('True Label') + plt.title('Confusion Matrix of Best Validation Model') + plt.savefig(save_path) + print(f"Confusion matrix disimpan di {save_path}") + +class FocalLoss(nn.Module): + """ + Implementasi Focal Loss untuk menangani class imbalance + dan fokus pada sampel yang sulit. + """ + def __init__(self, alpha=1, gamma=2, reduction='mean'): + super(FocalLoss, self).__init__() + self.alpha = alpha + self.gamma = gamma + self.reduction = reduction + + def forward(self, inputs, targets): + ce_loss = F.cross_entropy(inputs, targets, reduction='none') + pt = torch.exp(-ce_loss) + focal_loss = self.alpha * (1-pt)**self.gamma * ce_loss + + if self.reduction == 'mean': + return focal_loss.mean() + elif self.reduction == 'sum': + return focal_loss.sum() + else: + return focal_loss \ No newline at end of file diff --git a/.history/src/utils_20250927162245.py b/.history/src/utils_20250927162245.py new file mode 100644 index 0000000000000000000000000000000000000000..11786697998823ecb9c996f7d4ccc34c6ad03b47 --- /dev/null +++ b/.history/src/utils_20250927162245.py @@ -0,0 +1,89 @@ +# src/utils.py + +import torch +import torch.nn as nn +import torch.nn.functional as F +import matplotlib.pyplot as plt +import os +import seaborn as sns +from sklearn.metrics import confusion_matrix + +# Atur style plot agar terlihat lebih bagus +plt.style.use('ggplot') + +def save_model(epochs, model, optimizer, criterion, model_path): + """ + Fungsi untuk menyimpan checkpoint model. + """ + print(f"Menyimpan model ke {model_path}") + os.makedirs(os.path.dirname(model_path), exist_ok=True) + torch.save({ + 'epoch': epochs, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'loss': criterion, + }, model_path) + +def save_plots(train_acc, valid_acc, train_loss, valid_loss, plot_path): + """ + Fungsi untuk menyimpan plot akurasi dan loss selama training. + """ + print(f"Menyimpan plot ke {plot_path}") + os.makedirs(os.path.dirname(plot_path), exist_ok=True) + + # Plot Akurasi + plt.figure(figsize=(10, 7)) + plt.plot(train_acc, color='green', linestyle='-', label='train accuracy') + plt.plot(valid_acc, color='blue', linestyle='-', label='validation accuracy') + plt.xlabel('Epochs') + plt.ylabel('Accuracy') + plt.legend() + plt.savefig(f"{plot_path}/accuracy.png") + + # Plot Loss + plt.figure(figsize=(10, 7)) + plt.plot(train_loss, color='orange', linestyle='-', label='train loss') + plt.plot(valid_loss, color='red', linestyle='-', label='validation loss') + plt.xlabel('Epochs') + plt.ylabel('Loss') + plt.legend() + plt.savefig(f"{plot_path}/loss.png") + +def save_confusion_matrix(y_true, y_pred, class_names, save_path): + """ + Menyimpan plot confusion matrix untuk analisis kesalahan model. + """ + cm = confusion_matrix(y_true, y_pred) + plt.figure(figsize=(10, 8)) + sns.heatmap( + cm, + annot=True, + fmt='d', + cmap='Blues', + xticklabels=class_names, + yticklabels=class_names + ) + plt.xlabel('Predicted Label') + plt.ylabel('True Label') + plt.title('Confusion Matrix of Best Validation Model') + plt.savefig(save_path) + print(f"Confusion matrix disimpan di {save_path}") + +class FocalLoss(nn.Module): + def __init__(self, alpha=1, gamma=2, reduction="mean"): + super(FocalLoss, self).__init__() + self.alpha = alpha + self.gamma = gamma + self.reduction = reduction + + def forward(self, inputs, targets): + ce_loss = F.cross_entropy(inputs, targets, reduction="none") + pt = torch.exp(-ce_loss) # Probabilitas benar + focal_loss = self.alpha * (1 - pt) ** self.gamma * ce_loss + + if self.reduction == "mean": + return focal_loss.mean() + elif self.reduction == "sum": + return focal_loss.sum() + else: + return focal_loss \ No newline at end of file diff --git a/.history/src/utils_20250927162759.py b/.history/src/utils_20250927162759.py new file mode 100644 index 0000000000000000000000000000000000000000..68f1f43e272670c239149b641eb4f9fcec3362cb --- /dev/null +++ b/.history/src/utils_20250927162759.py @@ -0,0 +1,97 @@ +# src/utils.py + +import torch +import torch.nn as nn +import torch.nn.functional as F +import matplotlib.pyplot as plt +import os +import seaborn as sns +from sklearn.metrics import confusion_matrix + +# Atur style plot agar terlihat lebih bagus +plt.style.use('ggplot') + +def save_model(epochs, model, optimizer, criterion, model_path): + """ + Fungsi untuk menyimpan checkpoint model. + """ + print(f"Menyimpan model ke {model_path}") + os.makedirs(os.path.dirname(model_path), exist_ok=True) + torch.save({ + 'epoch': epochs, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'loss': criterion, + }, model_path) + +def save_plots(train_acc, valid_acc, train_loss, valid_loss, plot_path): + """ + Fungsi untuk menyimpan plot akurasi dan loss selama training. + """ + print(f"Menyimpan plot ke {plot_path}") + os.makedirs(os.path.dirname(plot_path), exist_ok=True) + + # Plot Akurasi + plt.figure(figsize=(10, 7)) + plt.plot(train_acc, color='green', linestyle='-', label='train accuracy') + plt.plot(valid_acc, color='blue', linestyle='-', label='validation accuracy') + plt.xlabel('Epochs') + plt.ylabel('Accuracy') + plt.legend() + plt.savefig(f"{plot_path}/accuracy.png") + + # Plot Loss + plt.figure(figsize=(10, 7)) + plt.plot(train_loss, color='orange', linestyle='-', label='train loss') + plt.plot(valid_loss, color='red', linestyle='-', label='validation loss') + plt.xlabel('Epochs') + plt.ylabel('Loss') + plt.legend() + plt.savefig(f"{plot_path}/loss.png") + +def save_confusion_matrix(y_true, y_pred, class_names, save_path): + """ + Menyimpan plot confusion matrix untuk analisis kesalahan model. + """ + cm = confusion_matrix(y_true, y_pred) + plt.figure(figsize=(10, 8)) + sns.heatmap( + cm, + annot=True, + fmt='d', + cmap='Blues', + xticklabels=class_names, + yticklabels=class_names + ) + plt.xlabel('Predicted Label') + plt.ylabel('True Label') + plt.title('Confusion Matrix of Best Validation Model') + plt.savefig(save_path) + print(f"Confusion matrix disimpan di {save_path}") + +class FocalLoss(nn.Module): + def __init__(self, alpha=None, gamma=2, reduction="mean"): + super(FocalLoss, self).__init__() + if alpha is not None: + self.alpha = torch.tensor(alpha, dtype=torch.float32) + else: + self.alpha = None + self.gamma = gamma + self.reduction = reduction + + def forward(self, inputs, targets): + ce_loss = F.cross_entropy(inputs, targets, reduction="none") + pt = torch.exp(-ce_loss) + + if self.alpha is not None: + # ambil alpha sesuai kelas di targets + at = self.alpha.to(inputs.device)[targets] + focal_loss = at * (1 - pt) ** self.gamma * ce_loss + else: + focal_loss = (1 - pt) ** self.gamma * ce_loss + + if self.reduction == "mean": + return focal_loss.mean() + elif self.reduction == "sum": + return focal_loss.sum() + return focal_loss diff --git a/.history/src/utils_20250927164617.py b/.history/src/utils_20250927164617.py new file mode 100644 index 0000000000000000000000000000000000000000..549c086644595de8e2078a6d028b2a72a57cccfa --- /dev/null +++ b/.history/src/utils_20250927164617.py @@ -0,0 +1,89 @@ +# src/utils.py + +import torch +import torch.nn as nn +import torch.nn.functional as F +import matplotlib.pyplot as plt +import os +import seaborn as sns +from sklearn.metrics import confusion_matrix + +plt.style.use('ggplot') + +def save_model(epochs, model, optimizer, criterion, model_path): + """ + Fungsi untuk menyimpan checkpoint model. + """ + print(f"Menyimpan model ke {model_path}") + os.makedirs(os.path.dirname(model_path), exist_ok=True) + torch.save({ + 'epoch': epochs, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'loss': criterion, + }, model_path) + +def save_plots(train_acc, valid_acc, train_loss, valid_loss, plot_path): + """ + Fungsi untuk menyimpan plot akurasi dan loss. + """ + print(f"Menyimpan plot ke {plot_path}") + os.makedirs(os.path.dirname(plot_path), exist_ok=True) + + plt.figure(figsize=(10, 7)) + plt.plot(train_acc, color='green', linestyle='-', label='train accuracy') + plt.plot(valid_acc, color='blue', linestyle='-', label='validation accuracy') + plt.xlabel('Epochs') + plt.ylabel('Accuracy') + plt.legend() + plt.savefig(f"{plot_path}/accuracy.png") + + plt.figure(figsize=(10, 7)) + plt.plot(train_loss, color='orange', linestyle='-', label='train loss') + plt.plot(valid_loss, color='red', linestyle='-', label='validation loss') + plt.xlabel('Epochs') + plt.ylabel('Loss') + plt.legend() + plt.savefig(f"{plot_path}/loss.png") + +def save_confusion_matrix(y_true, y_pred, class_names, save_path): + """ + Menyimpan plot confusion matrix. + """ + cm = confusion_matrix(y_true, y_pred) + plt.figure(figsize=(10, 8)) + sns.heatmap( + cm, + annot=True, + fmt='d', + cmap='Blues', + xticklabels=class_names, + yticklabels=class_names + ) + plt.xlabel('Predicted Label') + plt.ylabel('True Label') + plt.title('Confusion Matrix of Best Validation Model') + plt.savefig(save_path) + print(f"Confusion matrix disimpan di {save_path}") + +class FocalLoss(nn.Module): + """ + Implementasi Focal Loss. + """ + def __init__(self, alpha=1, gamma=2, reduction='mean'): + super(FocalLoss, self).__init__() + self.alpha = alpha + self.gamma = gamma + self.reduction = reduction + + def forward(self, inputs, targets): + ce_loss = F.cross_entropy(inputs, targets, reduction='none') + pt = torch.exp(-ce_loss) + focal_loss = self.alpha * (1-pt)**self.gamma * ce_loss + + if self.reduction == 'mean': + return focal_loss.mean() + elif self.reduction == 'sum': + return focal_loss.sum() + else: + return focal_loss \ No newline at end of file diff --git a/labels.csv b/labels.csv new file mode 100644 index 0000000000000000000000000000000000000000..22b631dc3eaa6b16e22cb51b58e9f881ce11c502 --- /dev/null +++ b/labels.csv @@ -0,0 +1 @@ +image,grade diff --git a/outputs/csv/labels-gagal.csv b/outputs/csv/labels-gagal.csv new file mode 100644 index 0000000000000000000000000000000000000000..b3a7e9701614807285cf3a9efcdabff73264ac72 --- /dev/null +++ b/outputs/csv/labels-gagal.csv @@ -0,0 +1,1321 @@ +image,grade +01\DER\IM000000.JPG,_LABEL_KOSONG_ +01\DER\IM000001.JPG,_LABEL_KOSONG_ +01\DER\IM000002.JPG,_LABEL_KOSONG_ +01\DER\IM000003.JPG,_LABEL_KOSONG_ +01\IZQ\IM000005.JPG,_LABEL_KOSONG_ +01\IZQ\IM000006.JPG,_LABEL_KOSONG_ +01\IZQ\IM000007.JPG,_LABEL_KOSONG_ +02\DER\IM000004.JPG,_LABEL_KOSONG_ +02\DER\IM000005.JPG,_LABEL_KOSONG_ +02\DER\IM000006.JPG,_LABEL_KOSONG_ +02\DER\IM000007.JPG,_LABEL_KOSONG_ +02\DER\IM000008.JPG,_LABEL_KOSONG_ +02\DER\IM000009.JPG,_LABEL_KOSONG_ +02\IZQ\IM000010.JPG,_LABEL_KOSONG_ +02\IZQ\IM000011.JPG,_LABEL_KOSONG_ +02\IZQ\IM000012.JPG,_LABEL_KOSONG_ +02\IZQ\IM000013.JPG,_LABEL_KOSONG_ +02\IZQ\IM000014.JPG,_LABEL_KOSONG_ +02\IZQ\IM000015.JPG,_LABEL_KOSONG_ +03\DER\IM000000.JPG,_LABEL_KOSONG_ +03\DER\IM000001.JPG,_LABEL_KOSONG_ +03\DER\IM000002.JPG,_LABEL_KOSONG_ +03\DER\IM000003.JPG,_LABEL_KOSONG_ +03\DER\IM000004.JPG,_LABEL_KOSONG_ +03\DER\IM000005.JPG,_LABEL_KOSONG_ +03\DER\IM000006.JPG,_LABEL_KOSONG_ +03\IZQ\IM000007.JPG,_LABEL_KOSONG_ +03\IZQ\IM000008.JPG,_LABEL_KOSONG_ +03\IZQ\IM000009.JPG,_LABEL_KOSONG_ +03\IZQ\IM000010.JPG,_LABEL_KOSONG_ +03\IZQ\IM000011.JPG,_LABEL_KOSONG_ +03\IZQ\IM000012.JPG,_LABEL_KOSONG_ +03\IZQ\IM000013.JPG,_LABEL_KOSONG_ +03\IZQ\IM000014.JPG,_LABEL_KOSONG_ +03\IZQ\IM000015.JPG,_LABEL_KOSONG_ +04\DER\IM000001.JPG,_LABEL_KOSONG_ +04\DER\IM000002.JPG,_LABEL_KOSONG_ +04\DER\IM000003.JPG,_LABEL_KOSONG_ +04\DER\IM000004.JPG,_LABEL_KOSONG_ +04\DER\IM000005.JPG,_LABEL_KOSONG_ +04\IZQ\IM000006.JPG,_LABEL_KOSONG_ +04\IZQ\IM000007.JPG,_LABEL_KOSONG_ +04\IZQ\IM000008.JPG,_LABEL_KOSONG_ +04\IZQ\IM000009.JPG,_LABEL_KOSONG_ +04\IZQ\IM000010.JPG,_LABEL_KOSONG_ +05\DER\IM000000.JPG,_LABEL_KOSONG_ +05\DER\IM000001.JPG,_LABEL_KOSONG_ +05\DER\IM000002.JPG,_LABEL_KOSONG_ +05\DER\IM000003.JPG,_LABEL_KOSONG_ +05\DER\IM000004.JPG,_LABEL_KOSONG_ +05\DER\IM000005.JPG,_LABEL_KOSONG_ +05\DER\IM000006.JPG,_LABEL_KOSONG_ +05\IZQ\IM000007.JPG,_LABEL_KOSONG_ +05\IZQ\IM000008.JPG,_LABEL_KOSONG_ +05\IZQ\IM000009.JPG,_LABEL_KOSONG_ +05\IZQ\IM000010.JPG,_LABEL_KOSONG_ +06\Der\IM000000.JPG,_LABEL_KOSONG_ +06\Der\IM000001.JPG,_LABEL_KOSONG_ +06\Der\IM000002.JPG,_LABEL_KOSONG_ +06\Der\IM000003.JPG,_LABEL_KOSONG_ +06\Der\IM000004.JPG,_LABEL_KOSONG_ +06\Der\IM000005.JPG,_LABEL_KOSONG_ +06\Izq\IM000006.JPG,_LABEL_KOSONG_ +06\Izq\IM000007.JPG,_LABEL_KOSONG_ +06\Izq\IM000008.JPG,_LABEL_KOSONG_ +06\Izq\IM000009.JPG,_LABEL_KOSONG_ +07\DER\IM000000.JPG,_LABEL_KOSONG_ +07\DER\IM000001.JPG,_LABEL_KOSONG_ +07\DER\IM000002.JPG,_LABEL_KOSONG_ +07\DER\IM000003.JPG,_LABEL_KOSONG_ +07\DER\IM000004.JPG,_LABEL_KOSONG_ +07\DER\IM000005.JPG,_LABEL_KOSONG_ +07\IZQ\IM000006.JPG,_LABEL_KOSONG_ +07\IZQ\IM000007.JPG,_LABEL_KOSONG_ +07\IZQ\IM000008.JPG,_LABEL_KOSONG_ +07\IZQ\IM000009.JPG,_LABEL_KOSONG_ +07\IZQ\IM000010.JPG,_LABEL_KOSONG_ +07\IZQ\IM000011.JPG,_LABEL_KOSONG_ +08\IZQ\IM000001.JPG,_LABEL_KOSONG_ +08\IZQ\IM000002.JPG,_LABEL_KOSONG_ +08\IZQ\IM000003.JPG,_LABEL_KOSONG_ +08\IZQ\IM000004.JPG,_LABEL_KOSONG_ +08\IZQ\IM000005.JPG,_LABEL_KOSONG_ +08\IZQ\IM000006.JPG,_LABEL_KOSONG_ +09\DER\IM000000.JPG,_LABEL_KOSONG_ +09\DER\IM000001.JPG,_LABEL_KOSONG_ +09\DER\IM000002.JPG,_LABEL_KOSONG_ +09\DER\IM000003.JPG,_LABEL_KOSONG_ +09\DER\IM000004.JPG,_LABEL_KOSONG_ +09\IZQ\IM000005.JPG,_LABEL_KOSONG_ +09\IZQ\IM000006.JPG,_LABEL_KOSONG_ +09\IZQ\IM000007.JPG,_LABEL_KOSONG_ +09\IZQ\IM000008.JPG,_LABEL_KOSONG_ +09\IZQ\IM000009.JPG,_LABEL_KOSONG_ +09\IZQ\IM000010.JPG,_LABEL_KOSONG_ +10\DER\IM000001.JPG,_LABEL_KOSONG_ +10\DER\IM000002.JPG,_LABEL_KOSONG_ +10\DER\IM000003.JPG,_LABEL_KOSONG_ +10\DER\IM000004.JPG,_LABEL_KOSONG_ +10\DER\IM000005.JPG,_LABEL_KOSONG_ +10\DER\IM000006.JPG,_LABEL_KOSONG_ +10\DER\IM000007.JPG,_LABEL_KOSONG_ +10\IZQ\IM000008.JPG,_LABEL_KOSONG_ +10\IZQ\IM000009.JPG,_LABEL_KOSONG_ +10\IZQ\IM000010.JPG,_LABEL_KOSONG_ +10\IZQ\IM000011.JPG,_LABEL_KOSONG_ +10\IZQ\IM000012.JPG,_LABEL_KOSONG_ +100\DER\IM000001.JPG,_LABEL_KOSONG_ +100\DER\IM000002.JPG,_LABEL_KOSONG_ +100\DER\IM000004.JPG,_LABEL_KOSONG_ +100\DER\IM000005.JPG,_LABEL_KOSONG_ +100\DER\IM000006.JPG,_LABEL_KOSONG_ +100\DER\IM000007.JPG,_LABEL_KOSONG_ +100\IZQ\IM000009.JPG,_LABEL_KOSONG_ +100\IZQ\IM000010.JPG,_LABEL_KOSONG_ +100\IZQ\IM000011.JPG,_LABEL_KOSONG_ +100\IZQ\IM000012.JPG,_LABEL_KOSONG_ +100\IZQ\IM000013.JPG,_LABEL_KOSONG_ +100\IZQ\IM000014.JPG,_LABEL_KOSONG_ +100\IZQ\IM000015.JPG,_LABEL_KOSONG_ +100\IZQ\IM000016.JPG,_LABEL_KOSONG_ +100\IZQ\IM000017.JPG,_LABEL_KOSONG_ +101\DER\IM000000.JPG,_LABEL_KOSONG_ +101\DER\IM000001.JPG,_LABEL_KOSONG_ +101\DER\IM000002.JPG,_LABEL_KOSONG_ +101\DER\IM000003.JPG,_LABEL_KOSONG_ +101\DER\IM000004.JPG,_LABEL_KOSONG_ +101\DER\IM000005.JPG,_LABEL_KOSONG_ +101\DER\IM000006.JPG,_LABEL_KOSONG_ +101\IZQ\IM000007.JPG,_LABEL_KOSONG_ +101\IZQ\IM000008.JPG,_LABEL_KOSONG_ +101\IZQ\IM000009.JPG,_LABEL_KOSONG_ +101\IZQ\IM000010.JPG,_LABEL_KOSONG_ +101\IZQ\IM000011.JPG,_LABEL_KOSONG_ +102\DER\IM000001.JPG,_LABEL_KOSONG_ +102\DER\IM000002.JPG,_LABEL_KOSONG_ +102\DER\IM000003.JPG,_LABEL_KOSONG_ +102\DER\IM000004.JPG,_LABEL_KOSONG_ +102\DER\IM000005.JPG,_LABEL_KOSONG_ +102\DER\IM000006.JPG,_LABEL_KOSONG_ +102\IZQ\IM000007.JPG,_LABEL_KOSONG_ +102\IZQ\IM000008.JPG,_LABEL_KOSONG_ +102\IZQ\IM000009.JPG,_LABEL_KOSONG_ +102\IZQ\IM000010.JPG,_LABEL_KOSONG_ +102\IZQ\IM000011.JPG,_LABEL_KOSONG_ +102\IZQ\IM000012.JPG,_LABEL_KOSONG_ +102\IZQ\IM000013.JPG,_LABEL_KOSONG_ +103\DER\IM000002.JPG,_LABEL_KOSONG_ +103\DER\IM000003.JPG,_LABEL_KOSONG_ +103\DER\IM000004.JPG,_LABEL_KOSONG_ +103\DER\IM000005.JPG,_LABEL_KOSONG_ +103\DER\IM000006.JPG,_LABEL_KOSONG_ +103\IZQ\IM000007.JPG,_LABEL_KOSONG_ +103\IZQ\IM000008.JPG,_LABEL_KOSONG_ +103\IZQ\IM000009.JPG,_LABEL_KOSONG_ +103\IZQ\IM000010.JPG,_LABEL_KOSONG_ +103\IZQ\IM000011.JPG,_LABEL_KOSONG_ +103\IZQ\IM000012.JPG,_LABEL_KOSONG_ +103\IZQ\IM000013.JPG,_LABEL_KOSONG_ +104\DER\IM000001.JPG,_LABEL_KOSONG_ +104\DER\IM000002.JPG,_LABEL_KOSONG_ +104\DER\IM000003.JPG,_LABEL_KOSONG_ +104\DER\IM000004.JPG,_LABEL_KOSONG_ +104\DER\IM000005.JPG,_LABEL_KOSONG_ +104\IZQ\IM000006.JPG,_LABEL_KOSONG_ +104\IZQ\IM000007.JPG,_LABEL_KOSONG_ +104\IZQ\IM000008.JPG,_LABEL_KOSONG_ +104\IZQ\IM000009.JPG,_LABEL_KOSONG_ +104\IZQ\IM000010.JPG,_LABEL_KOSONG_ +105\DER\IM000001.JPG,_LABEL_KOSONG_ +105\DER\IM000002.JPG,_LABEL_KOSONG_ +105\DER\IM000003.JPG,_LABEL_KOSONG_ +105\DER\IM000004.JPG,_LABEL_KOSONG_ +105\DER\IM000005.JPG,_LABEL_KOSONG_ +105\DER\IM000006.JPG,_LABEL_KOSONG_ +105\IZQ\IM000007.JPG,_LABEL_KOSONG_ +105\IZQ\IM000008.JPG,_LABEL_KOSONG_ +105\IZQ\IM000009.JPG,_LABEL_KOSONG_ +105\IZQ\IM000010.JPG,_LABEL_KOSONG_ +105\IZQ\IM000011.JPG,_LABEL_KOSONG_ +106\DER\IM000001.JPG,_LABEL_KOSONG_ +106\DER\IM000002.JPG,_LABEL_KOSONG_ +106\DER\IM000003.JPG,_LABEL_KOSONG_ +106\DER\IM000004.JPG,_LABEL_KOSONG_ +106\DER\IM000005.JPG,_LABEL_KOSONG_ +106\IZQ\IM000008.JPG,_LABEL_KOSONG_ +106\IZQ\IM000009.JPG,_LABEL_KOSONG_ +107\DER\IM000007.JPG,_LABEL_KOSONG_ +107\DER\IM000008.JPG,_LABEL_KOSONG_ +107\DER\IM000009.JPG,_LABEL_KOSONG_ +107\DER\IM000010.JPG,_LABEL_KOSONG_ +107\DER\IM000011.JPG,_LABEL_KOSONG_ +107\IZQ\IM000001.JPG,_LABEL_KOSONG_ +107\IZQ\IM000002.JPG,_LABEL_KOSONG_ +107\IZQ\IM000003.JPG,_LABEL_KOSONG_ +107\IZQ\IM000004.JPG,_LABEL_KOSONG_ +107\IZQ\IM000005.JPG,_LABEL_KOSONG_ +107\IZQ\IM000006.JPG,_LABEL_KOSONG_ +108\DER\IM000000.JPG,_LABEL_KOSONG_ +108\DER\IM000001.JPG,_LABEL_KOSONG_ +108\DER\IM000002.JPG,_LABEL_KOSONG_ +108\DER\IM000003.JPG,_LABEL_KOSONG_ +108\DER\IM000004.JPG,_LABEL_KOSONG_ +108\DER\IM000005.JPG,_LABEL_KOSONG_ +108\DER\IM000006.JPG,_LABEL_KOSONG_ +108\DER\IM000007.JPG,_LABEL_KOSONG_ +108\DER\IM000008.JPG,_LABEL_KOSONG_ +108\IZQ\IM000009.JPG,_LABEL_KOSONG_ +108\IZQ\IM000010.JPG,_LABEL_KOSONG_ +108\IZQ\IM000011.JPG,_LABEL_KOSONG_ +108\IZQ\IM000012.JPG,_LABEL_KOSONG_ +108\IZQ\IM000013.JPG,_LABEL_KOSONG_ +108\IZQ\IM000014.JPG,_LABEL_KOSONG_ +109\DER\IM000001.JPG,_LABEL_KOSONG_ +109\DER\IM000002.JPG,_LABEL_KOSONG_ +109\DER\IM000003.JPG,_LABEL_KOSONG_ +109\DER\IM000004.JPG,_LABEL_KOSONG_ +109\DER\IM000005.JPG,_LABEL_KOSONG_ +109\DER\IM000006.JPG,_LABEL_KOSONG_ +109\DER\IM000007.JPG,_LABEL_KOSONG_ +109\IZQ\IM000008.JPG,_LABEL_KOSONG_ +109\IZQ\IM000009.JPG,_LABEL_KOSONG_ +109\IZQ\IM000010.JPG,_LABEL_KOSONG_ +109\IZQ\IM000011.JPG,_LABEL_KOSONG_ +11\DER\IM000000.JPG,_LABEL_KOSONG_ +11\DER\IM000001.JPG,_LABEL_KOSONG_ +11\DER\IM000002.JPG,_LABEL_KOSONG_ +11\DER\IM000003.JPG,_LABEL_KOSONG_ +11\DER\IM000004.JPG,_LABEL_KOSONG_ +11\DER\IM000005.JPG,_LABEL_KOSONG_ +11\IZQ\IM000006.JPG,_LABEL_KOSONG_ +11\IZQ\IM000007.JPG,_LABEL_KOSONG_ +11\IZQ\IM000008.JPG,_LABEL_KOSONG_ +11\IZQ\IM000009.JPG,_LABEL_KOSONG_ +11\IZQ\IM000010.JPG,_LABEL_KOSONG_ +110\DER\IM000000.JPG,_LABEL_KOSONG_ +110\DER\IM000001.JPG,_LABEL_KOSONG_ +110\DER\IM000002.JPG,_LABEL_KOSONG_ +110\DER\IM000003.JPG,_LABEL_KOSONG_ +110\DER\IM000004.JPG,_LABEL_KOSONG_ +110\DER\IM000005.JPG,_LABEL_KOSONG_ +110\DER\IM000006.JPG,_LABEL_KOSONG_ +110\DER\IM000007.JPG,_LABEL_KOSONG_ +110\IZQ\IM000008.JPG,_LABEL_KOSONG_ +110\IZQ\IM000009.JPG,_LABEL_KOSONG_ +110\IZQ\IM000010.JPG,_LABEL_KOSONG_ +110\IZQ\IM000011.JPG,_LABEL_KOSONG_ +110\IZQ\IM000012.JPG,_LABEL_KOSONG_ +110\IZQ\IM000013.JPG,_LABEL_KOSONG_ +110\IZQ\IM000014.JPG,_LABEL_KOSONG_ +111\DER\IM000001.JPG,_LABEL_KOSONG_ +111\DER\IM000002.JPG,_LABEL_KOSONG_ +111\DER\IM000003.JPG,_LABEL_KOSONG_ +111\DER\IM000005.JPG,_LABEL_KOSONG_ +111\DER\IM000006.JPG,_LABEL_KOSONG_ +111\DER\IM000007.JPG,_LABEL_KOSONG_ +111\IZQ\IM000008.JPG,_LABEL_KOSONG_ +111\IZQ\IM000009.JPG,_LABEL_KOSONG_ +111\IZQ\IM000010.JPG,_LABEL_KOSONG_ +111\IZQ\IM000011.JPG,_LABEL_KOSONG_ +111\IZQ\IM000012.JPG,_LABEL_KOSONG_ +112\DER\IM000001.JPG,_LABEL_KOSONG_ +112\DER\IM000003.JPG,_LABEL_KOSONG_ +112\DER\IM000004.JPG,_LABEL_KOSONG_ +112\DER\IM000005.JPG,_LABEL_KOSONG_ +112\DER\IM000006.JPG,_LABEL_KOSONG_ +112\DER\IM000007.JPG,_LABEL_KOSONG_ +112\DER\IM000009.JPG,_LABEL_KOSONG_ +112\DER\IM000010.JPG,_LABEL_KOSONG_ +112\DER\IM000011.JPG,_LABEL_KOSONG_ +112\IZQ\IM000012.JPG,_LABEL_KOSONG_ +112\IZQ\IM000013.JPG,_LABEL_KOSONG_ +112\IZQ\IM000014.JPG,_LABEL_KOSONG_ +112\IZQ\IM000015.JPG,_LABEL_KOSONG_ +112\IZQ\IM000016.JPG,_LABEL_KOSONG_ +112\IZQ\IM000017.JPG,_LABEL_KOSONG_ +113\DER\IM000000.JPG,_LABEL_KOSONG_ +113\DER\IM000001.JPG,_LABEL_KOSONG_ +113\DER\IM000002.JPG,_LABEL_KOSONG_ +113\DER\IM000003.JPG,_LABEL_KOSONG_ +113\DER\IM000004.JPG,_LABEL_KOSONG_ +113\DER\IM000005.JPG,_LABEL_KOSONG_ +113\DER\IM000006.JPG,_LABEL_KOSONG_ +113\DER\IM000007.JPG,_LABEL_KOSONG_ +113\DER\IM000008.JPG,_LABEL_KOSONG_ +113\IZQ\IM000009.JPG,_LABEL_KOSONG_ +113\IZQ\IM000010.JPG,_LABEL_KOSONG_ +113\IZQ\IM000011.JPG,_LABEL_KOSONG_ +113\IZQ\IM000012.JPG,_LABEL_KOSONG_ +113\IZQ\IM000013.JPG,_LABEL_KOSONG_ +114\DER\IM000000.JPG,_LABEL_KOSONG_ +114\DER\IM000001.JPG,_LABEL_KOSONG_ +114\DER\IM000002.JPG,_LABEL_KOSONG_ +114\DER\IM000003.JPG,_LABEL_KOSONG_ +114\DER\IM000004.JPG,_LABEL_KOSONG_ +114\DER\IM000005.JPG,_LABEL_KOSONG_ +114\DER\IM000006.JPG,_LABEL_KOSONG_ +114\DER\IM000007.JPG,_LABEL_KOSONG_ +114\IZQ\IM000008.JPG,_LABEL_KOSONG_ +114\IZQ\IM000009.JPG,_LABEL_KOSONG_ +114\IZQ\IM000010.JPG,_LABEL_KOSONG_ +114\IZQ\IM000011.JPG,_LABEL_KOSONG_ +115\DER\IM000000.JPG,_LABEL_KOSONG_ +115\DER\IM000001.JPG,_LABEL_KOSONG_ +115\DER\IM000002.JPG,_LABEL_KOSONG_ +115\DER\IM000003.JPG,_LABEL_KOSONG_ +115\DER\IM000004.JPG,_LABEL_KOSONG_ +115\DER\IM000005.JPG,_LABEL_KOSONG_ +115\DER\IM000006.JPG,_LABEL_KOSONG_ +115\IZQ\IM000007.JPG,_LABEL_KOSONG_ +115\IZQ\IM000008.JPG,_LABEL_KOSONG_ +115\IZQ\IM000009.JPG,_LABEL_KOSONG_ +115\IZQ\IM000010.JPG,_LABEL_KOSONG_ +115\IZQ\IM000011.JPG,_LABEL_KOSONG_ +115\IZQ\IM000012.JPG,_LABEL_KOSONG_ +115\IZQ\IM000013.JPG,_LABEL_KOSONG_ +12\DER\IM000000.JPG,_LABEL_KOSONG_ +12\DER\IM000001.JPG,_LABEL_KOSONG_ +12\DER\IM000002.JPG,_LABEL_KOSONG_ +12\IZQ\IM000003.JPG,_LABEL_KOSONG_ +12\IZQ\IM000004.JPG,_LABEL_KOSONG_ +12\IZQ\IM000005.JPG,_LABEL_KOSONG_ +12\IZQ\IM000006.JPG,_LABEL_KOSONG_ +12\IZQ\IM000007.JPG,_LABEL_KOSONG_ +12\IZQ\IM000008.JPG,_LABEL_KOSONG_ +13\DER\IM000001.JPG,_LABEL_KOSONG_ +13\DER\IM000002.JPG,_LABEL_KOSONG_ +13\DER\IM000003.JPG,_LABEL_KOSONG_ +13\DER\IM000004.JPG,_LABEL_KOSONG_ +13\DER\IM000005.JPG,_LABEL_KOSONG_ +13\DER\IM000006.JPG,_LABEL_KOSONG_ +13\IZQ\IM000007.JPG,_LABEL_KOSONG_ +13\IZQ\IM000008.JPG,_LABEL_KOSONG_ +13\IZQ\IM000009.JPG,_LABEL_KOSONG_ +13\IZQ\IM000010.JPG,_LABEL_KOSONG_ +13\IZQ\IM000011.JPG,_LABEL_KOSONG_ +13\IZQ\IM000012.JPG,_LABEL_KOSONG_ +14\DER\IM000000.JPG,_LABEL_KOSONG_ +14\DER\IM000001.JPG,_LABEL_KOSONG_ +14\DER\IM000002.JPG,_LABEL_KOSONG_ +14\DER\IM000003.JPG,_LABEL_KOSONG_ +14\DER\IM000004.JPG,_LABEL_KOSONG_ +14\DER\IM000005.JPG,_LABEL_KOSONG_ +14\IZQ\IM000006.JPG,_LABEL_KOSONG_ +14\IZQ\IM000007.JPG,_LABEL_KOSONG_ +14\IZQ\IM000008.JPG,_LABEL_KOSONG_ +14\IZQ\IM000009.JPG,_LABEL_KOSONG_ +14\IZQ\IM000010.JPG,_LABEL_KOSONG_ +14\IZQ\IM000011.JPG,_LABEL_KOSONG_ +15\DER\IM000001.JPG,_LABEL_KOSONG_ +15\DER\IM000002.JPG,_LABEL_KOSONG_ +15\DER\IM000003.JPG,_LABEL_KOSONG_ +15\DER\IM000004.JPG,_LABEL_KOSONG_ +15\IZQ\IM000005.JPG,_LABEL_KOSONG_ +15\IZQ\IM000006.JPG,_LABEL_KOSONG_ +15\IZQ\IM000007.JPG,_LABEL_KOSONG_ +16\DER\IM000001.JPG,_LABEL_KOSONG_ +16\DER\IM000002.JPG,_LABEL_KOSONG_ +16\DER\IM000003.JPG,_LABEL_KOSONG_ +16\DER\IM000004.JPG,_LABEL_KOSONG_ +16\DER\IM000005.JPG,_LABEL_KOSONG_ +16\DER\IM000006.JPG,_LABEL_KOSONG_ +16\DER\IM000007.JPG,_LABEL_KOSONG_ +16\IZQ\IM000008.JPG,_LABEL_KOSONG_ +16\IZQ\IM000009.JPG,_LABEL_KOSONG_ +16\IZQ\IM000010.JPG,_LABEL_KOSONG_ +16\IZQ\IM000011.JPG,_LABEL_KOSONG_ +16\IZQ\IM000012.JPG,_LABEL_KOSONG_ +17\DER\IM000000.JPG,_LABEL_KOSONG_ +17\DER\IM000001.JPG,_LABEL_KOSONG_ +17\DER\IM000002.JPG,_LABEL_KOSONG_ +17\DER\IM000003.JPG,_LABEL_KOSONG_ +17\DER\IM000004.JPG,_LABEL_KOSONG_ +17\IZQ\IM000005.JPG,_LABEL_KOSONG_ +17\IZQ\IM000006.JPG,_LABEL_KOSONG_ +17\IZQ\IM000007.JPG,_LABEL_KOSONG_ +17\IZQ\IM000008.JPG,_LABEL_KOSONG_ +18\DER\IM000002.JPG,_LABEL_KOSONG_ +18\DER\IM000003.JPG,_LABEL_KOSONG_ +18\DER\IM000004.JPG,_LABEL_KOSONG_ +18\DER\IM000005.JPG,_LABEL_KOSONG_ +18\DER\IM000006.JPG,_LABEL_KOSONG_ +18\IZQ\IM000007.JPG,_LABEL_KOSONG_ +18\IZQ\IM000008.JPG,_LABEL_KOSONG_ +18\IZQ\IM000009.JPG,_LABEL_KOSONG_ +18\IZQ\IM000010.JPG,_LABEL_KOSONG_ +18\IZQ\IM000011.JPG,_LABEL_KOSONG_ +19\DER\IM000000.JPG,_LABEL_KOSONG_ +19\DER\IM000001.JPG,_LABEL_KOSONG_ +19\DER\IM000002.JPG,_LABEL_KOSONG_ +19\DER\IM000003.JPG,_LABEL_KOSONG_ +19\DER\IM000004.JPG,_LABEL_KOSONG_ +19\DER\IM000005.JPG,_LABEL_KOSONG_ +19\DER\IM000006.JPG,_LABEL_KOSONG_ +19\DER\IM000007.JPG,_LABEL_KOSONG_ +19\IZQ\IM000009.JPG,_LABEL_KOSONG_ +19\IZQ\IM000010.JPG,_LABEL_KOSONG_ +19\IZQ\IM000011.JPG,_LABEL_KOSONG_ +19\IZQ\IM000012.JPG,_LABEL_KOSONG_ +19\IZQ\IM000013.JPG,_LABEL_KOSONG_ +19\IZQ\IM000014.JPG,_LABEL_KOSONG_ +19\IZQ\IM000015.JPG,_LABEL_KOSONG_ +19\IZQ\IM000016.JPG,_LABEL_KOSONG_ +20\DER\IM000000.JPG,_LABEL_KOSONG_ +20\DER\IM000001.JPG,_LABEL_KOSONG_ +20\DER\IM000002.JPG,_LABEL_KOSONG_ +20\DER\IM000003.JPG,_LABEL_KOSONG_ +20\DER\IM000004.JPG,_LABEL_KOSONG_ +20\DER\IM000005.JPG,_LABEL_KOSONG_ +20\DER\IM000006.JPG,_LABEL_KOSONG_ +20\DER\IM000007.JPG,_LABEL_KOSONG_ +20\IZQ\IM000008.JPG,_LABEL_KOSONG_ +20\IZQ\IM000009.JPG,_LABEL_KOSONG_ +20\IZQ\IM000010.JPG,_LABEL_KOSONG_ +20\IZQ\IM000011.JPG,_LABEL_KOSONG_ +20\IZQ\IM000012.JPG,_LABEL_KOSONG_ +20\IZQ\IM000013.JPG,_LABEL_KOSONG_ +20\IZQ\IM000014.JPG,_LABEL_KOSONG_ +20\IZQ\IM000015.JPG,_LABEL_KOSONG_ +21\DER\IM000000.JPG,_LABEL_KOSONG_ +21\DER\IM000001.JPG,_LABEL_KOSONG_ +21\DER\IM000002.JPG,_LABEL_KOSONG_ +21\DER\IM000003.JPG,_LABEL_KOSONG_ +21\DER\IM000004.JPG,_LABEL_KOSONG_ +21\DER\IM000005.JPG,_LABEL_KOSONG_ +21\IZQ\IM000006.JPG,_LABEL_KOSONG_ +21\IZQ\IM000007.JPG,_LABEL_KOSONG_ +21\IZQ\IM000008.JPG,_LABEL_KOSONG_ +21\IZQ\IM000009.JPG,_LABEL_KOSONG_ +21\IZQ\IM000010.JPG,_LABEL_KOSONG_ +21\IZQ\IM000011.JPG,_LABEL_KOSONG_ +22\DER\IM000000.JPG,_LABEL_KOSONG_ +22\DER\IM000001.JPG,_LABEL_KOSONG_ +22\DER\IM000002.JPG,_LABEL_KOSONG_ +22\DER\IM000003.JPG,_LABEL_KOSONG_ +22\DER\IM000004.JPG,_LABEL_KOSONG_ +22\DER\IM000005.JPG,_LABEL_KOSONG_ +22\IZQ\IM000006.JPG,_LABEL_KOSONG_ +22\IZQ\IM000007.JPG,_LABEL_KOSONG_ +22\IZQ\IM000008.JPG,_LABEL_KOSONG_ +22\IZQ\IM000009.JPG,_LABEL_KOSONG_ +22\IZQ\IM000010.JPG,_LABEL_KOSONG_ +22\IZQ\IM000011.JPG,_LABEL_KOSONG_ +22\IZQ\IM000012.JPG,_LABEL_KOSONG_ +23\DER\IM000001.JPG,_LABEL_KOSONG_ +23\DER\IM000002.JPG,_LABEL_KOSONG_ +23\DER\IM000003.JPG,_LABEL_KOSONG_ +23\DER\IM000004.JPG,_LABEL_KOSONG_ +23\DER\IM000005.JPG,_LABEL_KOSONG_ +23\IZQ\IM000006.JPG,_LABEL_KOSONG_ +23\IZQ\IM000008.JPG,_LABEL_KOSONG_ +23\IZQ\IM000009.JPG,_LABEL_KOSONG_ +24\DER\IM000000.JPG,_LABEL_KOSONG_ +24\DER\IM000001.JPG,_LABEL_KOSONG_ +24\DER\IM000002.JPG,_LABEL_KOSONG_ +24\DER\IM000003.JPG,_LABEL_KOSONG_ +24\DER\IM000004.JPG,_LABEL_KOSONG_ +24\DER\IM000005.JPG,_LABEL_KOSONG_ +24\DER\IM000006.JPG,_LABEL_KOSONG_ +24\DER\IM000007.JPG,_LABEL_KOSONG_ +24\IZQ\IM000008.JPG,_LABEL_KOSONG_ +24\IZQ\IM000009.JPG,_LABEL_KOSONG_ +24\IZQ\IM000010.JPG,_LABEL_KOSONG_ +24\IZQ\IM000011.JPG,_LABEL_KOSONG_ +24\IZQ\IM000012.JPG,_LABEL_KOSONG_ +24\IZQ\IM000013.JPG,_LABEL_KOSONG_ +24\IZQ\IM000014.JPG,_LABEL_KOSONG_ +24\IZQ\IM000015.JPG,_LABEL_KOSONG_ +25\DER\IM000001.JPG,_LABEL_KOSONG_ +25\DER\IM000002.JPG,_LABEL_KOSONG_ +25\DER\IM000003.JPG,_LABEL_KOSONG_ +25\DER\IM000004.JPG,_LABEL_KOSONG_ +25\DER\IM000005.JPG,_LABEL_KOSONG_ +25\IZQ\IM000006.JPG,_LABEL_KOSONG_ +25\IZQ\IM000007.JPG,_LABEL_KOSONG_ +25\IZQ\IM000008.JPG,_LABEL_KOSONG_ +25\IZQ\IM000009.JPG,_LABEL_KOSONG_ +25\IZQ\IM000010.JPG,_LABEL_KOSONG_ +25\IZQ\IM000011.JPG,_LABEL_KOSONG_ +25\IZQ\IM000012.JPG,_LABEL_KOSONG_ +26\DER\IM000000.JPG,_LABEL_KOSONG_ +26\DER\IM000001.JPG,_LABEL_KOSONG_ +26\DER\IM000002.JPG,_LABEL_KOSONG_ +26\DER\IM000003.JPG,_LABEL_KOSONG_ +26\DER\IM000004.JPG,_LABEL_KOSONG_ +26\DER\IM000006.JPG,_LABEL_KOSONG_ +26\IZQ\IM000007.JPG,_LABEL_KOSONG_ +26\IZQ\IM000008.JPG,_LABEL_KOSONG_ +26\IZQ\IM000009.JPG,_LABEL_KOSONG_ +26\IZQ\IM000010.JPG,_LABEL_KOSONG_ +26\IZQ\IM000011.JPG,_LABEL_KOSONG_ +27\DER\IM000002.JPG,_LABEL_KOSONG_ +27\DER\IM000003.JPG,_LABEL_KOSONG_ +27\DER\IM000004.JPG,_LABEL_KOSONG_ +27\DER\IM000005.JPG,_LABEL_KOSONG_ +27\DER\IM000006.JPG,_LABEL_KOSONG_ +27\IZQ\IM000007.JPG,_LABEL_KOSONG_ +27\IZQ\IM000008.JPG,_LABEL_KOSONG_ +27\IZQ\IM000009.JPG,_LABEL_KOSONG_ +27\IZQ\IM000010.JPG,_LABEL_KOSONG_ +27\IZQ\IM000011.JPG,_LABEL_KOSONG_ +27\IZQ\IM000012.JPG,_LABEL_KOSONG_ +28\DER\IM000001.JPG,_LABEL_KOSONG_ +28\DER\IM000002.JPG,_LABEL_KOSONG_ +28\DER\IM000003.JPG,_LABEL_KOSONG_ +28\DER\IM000004.JPG,_LABEL_KOSONG_ +28\DER\IM000005.JPG,_LABEL_KOSONG_ +28\IZQ\IM000006.JPG,_LABEL_KOSONG_ +28\IZQ\IM000007.JPG,_LABEL_KOSONG_ +28\IZQ\IM000008.JPG,_LABEL_KOSONG_ +28\IZQ\IM000009.JPG,_LABEL_KOSONG_ +28\IZQ\IM000010.JPG,_LABEL_KOSONG_ +28\IZQ\IM000011.JPG,_LABEL_KOSONG_ +29\DER\IM000000.JPG,_LABEL_KOSONG_ +29\DER\IM000001.JPG,_LABEL_KOSONG_ +29\DER\IM000002.JPG,_LABEL_KOSONG_ +29\DER\IM000003.JPG,_LABEL_KOSONG_ +29\DER\IM000004.JPG,_LABEL_KOSONG_ +29\DER\IM000005.JPG,_LABEL_KOSONG_ +29\DER\IM000006.JPG,_LABEL_KOSONG_ +29\IZQ\IM000007.JPG,_LABEL_KOSONG_ +29\IZQ\IM000008.JPG,_LABEL_KOSONG_ +29\IZQ\IM000009.JPG,_LABEL_KOSONG_ +29\IZQ\IM000010.JPG,_LABEL_KOSONG_ +29\IZQ\IM000011.JPG,_LABEL_KOSONG_ +30\DER\IM000001.JPG,_LABEL_KOSONG_ +30\DER\IM000002.JPG,_LABEL_KOSONG_ +30\DER\IM000003.JPG,_LABEL_KOSONG_ +30\DER\IM000004.JPG,_LABEL_KOSONG_ +30\DER\IM000005.JPG,_LABEL_KOSONG_ +30\DER\IM000006.JPG,_LABEL_KOSONG_ +30\DER\IM000007.JPG,_LABEL_KOSONG_ +30\IZQ\IM000008.JPG,_LABEL_KOSONG_ +30\IZQ\IM000009.JPG,_LABEL_KOSONG_ +30\IZQ\IM000010.JPG,_LABEL_KOSONG_ +30\IZQ\IM000011.JPG,_LABEL_KOSONG_ +30\IZQ\IM000012.JPG,_LABEL_KOSONG_ +30\IZQ\IM000013.JPG,_LABEL_KOSONG_ +30\IZQ\IM000014.JPG,_LABEL_KOSONG_ +30\IZQ\IM000015.JPG,_LABEL_KOSONG_ +30\IZQ\IM000016.JPG,_LABEL_KOSONG_ +30\IZQ\IM000017.JPG,_LABEL_KOSONG_ +31\DER\IM000001.JPG,_LABEL_KOSONG_ +31\DER\IM000002.JPG,_LABEL_KOSONG_ +31\DER\IM000003.JPG,_LABEL_KOSONG_ +31\DER\IM000004.JPG,_LABEL_KOSONG_ +31\DER\IM000005.JPG,_LABEL_KOSONG_ +31\IZQ\IM000006.JPG,_LABEL_KOSONG_ +31\IZQ\IM000007.JPG,_LABEL_KOSONG_ +31\IZQ\IM000008.JPG,_LABEL_KOSONG_ +31\IZQ\IM000009.JPG,_LABEL_KOSONG_ +31\IZQ\IM000010.JPG,_LABEL_KOSONG_ +31\IZQ\IM000011.JPG,_LABEL_KOSONG_ +32\DER\IM000000.JPG,_LABEL_KOSONG_ +32\DER\IM000001.JPG,_LABEL_KOSONG_ +32\DER\IM000002.JPG,_LABEL_KOSONG_ +32\DER\IM000003.JPG,_LABEL_KOSONG_ +32\DER\IM000004.JPG,_LABEL_KOSONG_ +32\IZQ\IM000005.JPG,_LABEL_KOSONG_ +32\IZQ\IM000006.JPG,_LABEL_KOSONG_ +32\IZQ\IM000007.JPG,_LABEL_KOSONG_ +32\IZQ\IM000008.JPG,_LABEL_KOSONG_ +32\IZQ\IM000009.JPG,_LABEL_KOSONG_ +32\IZQ\IM000010.JPG,_LABEL_KOSONG_ +33\DER\IM000001.JPG,_LABEL_KOSONG_ +33\DER\IM000002.JPG,_LABEL_KOSONG_ +33\DER\IM000003.JPG,_LABEL_KOSONG_ +33\DER\IM000004.JPG,_LABEL_KOSONG_ +33\DER\IM000005.JPG,_LABEL_KOSONG_ +33\DER\IM000006.JPG,_LABEL_KOSONG_ +33\IZQ\IM000007.JPG,_LABEL_KOSONG_ +33\IZQ\IM000008.JPG,_LABEL_KOSONG_ +33\IZQ\IM000009.JPG,_LABEL_KOSONG_ +33\IZQ\IM000010.JPG,_LABEL_KOSONG_ +33\IZQ\IM000011.JPG,_LABEL_KOSONG_ +34\DER\IM000000.JPG,_LABEL_KOSONG_ +34\DER\IM000001.JPG,_LABEL_KOSONG_ +34\DER\IM000002.JPG,_LABEL_KOSONG_ +34\DER\IM000003.JPG,_LABEL_KOSONG_ +34\DER\IM000004.JPG,_LABEL_KOSONG_ +35\DER\IM000000.JPG,_LABEL_KOSONG_ +35\DER\IM000001.JPG,_LABEL_KOSONG_ +35\DER\IM000002.JPG,_LABEL_KOSONG_ +35\DER\IM000003.JPG,_LABEL_KOSONG_ +35\DER\IM000004.JPG,_LABEL_KOSONG_ +35\DER\IM000005.JPG,_LABEL_KOSONG_ +36\DER\IM000000.JPG,_LABEL_KOSONG_ +36\DER\IM000001.JPG,_LABEL_KOSONG_ +36\DER\IM000002.JPG,_LABEL_KOSONG_ +36\DER\IM000003.JPG,_LABEL_KOSONG_ +36\DER\IM000004.JPG,_LABEL_KOSONG_ +36\IZQ\IM000005.JPG,_LABEL_KOSONG_ +36\IZQ\IM000006.JPG,_LABEL_KOSONG_ +36\IZQ\IM000007.JPG,_LABEL_KOSONG_ +36\IZQ\IM000008.JPG,_LABEL_KOSONG_ +36\IZQ\IM000009.JPG,_LABEL_KOSONG_ +36\IZQ\IM000011.JPG,_LABEL_KOSONG_ +37\DER\IM000000.JPG,_LABEL_KOSONG_ +37\DER\IM000001.JPG,_LABEL_KOSONG_ +37\DER\IM000002.JPG,_LABEL_KOSONG_ +37\DER\IM000003.JPG,_LABEL_KOSONG_ +37\DER\IM000004.JPG,_LABEL_KOSONG_ +37\DER\IM000005.JPG,_LABEL_KOSONG_ +37\IZQ\IM000006.JPG,_LABEL_KOSONG_ +37\IZQ\IM000007.JPG,_LABEL_KOSONG_ +37\IZQ\IM000008.JPG,_LABEL_KOSONG_ +37\IZQ\IM000009.JPG,_LABEL_KOSONG_ +38\DER\IM000000.JPG,_LABEL_KOSONG_ +38\DER\IM000001.JPG,_LABEL_KOSONG_ +38\DER\IM000002.JPG,_LABEL_KOSONG_ +38\DER\IM000003.JPG,_LABEL_KOSONG_ +38\DER\IM000004.JPG,_LABEL_KOSONG_ +38\DER\IM000005.JPG,_LABEL_KOSONG_ +38\DER\IM000006.JPG,_LABEL_KOSONG_ +38\DER\IM000007.JPG,_LABEL_KOSONG_ +38\DER\IM000008.JPG,_LABEL_KOSONG_ +38\DER\IM000009.JPG,_LABEL_KOSONG_ +38\DER\IM000010.JPG,_LABEL_KOSONG_ +38\IZQ\IM000011.JPG,_LABEL_KOSONG_ +38\IZQ\IM000012.JPG,_LABEL_KOSONG_ +38\IZQ\IM000013.JPG,_LABEL_KOSONG_ +38\IZQ\IM000014.JPG,_LABEL_KOSONG_ +38\IZQ\IM000015.JPG,_LABEL_KOSONG_ +38\IZQ\IM000016.JPG,_LABEL_KOSONG_ +38\IZQ\IM000017.JPG,_LABEL_KOSONG_ +38\IZQ\IM000018.JPG,_LABEL_KOSONG_ +38\IZQ\IM000019.JPG,_LABEL_KOSONG_ +38\IZQ\IM000020.JPG,_LABEL_KOSONG_ +38\IZQ\IM000021.JPG,_LABEL_KOSONG_ +39\DER\IM000002.JPG,_LABEL_KOSONG_ +39\DER\IM000003.JPG,_LABEL_KOSONG_ +39\DER\IM000004.JPG,_LABEL_KOSONG_ +39\DER\IM000005.JPG,_LABEL_KOSONG_ +39\DER\IM000006.JPG,_LABEL_KOSONG_ +39\IZQ\IM000007.JPG,_LABEL_KOSONG_ +39\IZQ\IM000008.JPG,_LABEL_KOSONG_ +39\IZQ\IM000009.JPG,_LABEL_KOSONG_ +39\IZQ\IM000010.JPG,_LABEL_KOSONG_ +39\IZQ\IM000011.JPG,_LABEL_KOSONG_ +39\IZQ\IM000012.JPG,_LABEL_KOSONG_ +40\DER\IM000000.JPG,_LABEL_KOSONG_ +40\DER\IM000001.JPG,_LABEL_KOSONG_ +40\DER\IM000002.JPG,_LABEL_KOSONG_ +40\DER\IM000003.JPG,_LABEL_KOSONG_ +40\DER\IM000004.JPG,_LABEL_KOSONG_ +40\DER\IM000005.JPG,_LABEL_KOSONG_ +40\IZQ\IM000006.JPG,_LABEL_KOSONG_ +40\IZQ\IM000007.JPG,_LABEL_KOSONG_ +40\IZQ\IM000008.JPG,_LABEL_KOSONG_ +40\IZQ\IM000009.JPG,_LABEL_KOSONG_ +40\IZQ\IM000010.JPG,_LABEL_KOSONG_ +40\IZQ\IM000011.JPG,_LABEL_KOSONG_ +40\IZQ\IM000012.JPG,_LABEL_KOSONG_ +41\DER\IM000001.JPG,_LABEL_KOSONG_ +41\DER\IM000002.JPG,_LABEL_KOSONG_ +41\DER\IM000003.JPG,_LABEL_KOSONG_ +41\DER\IM000004.JPG,_LABEL_KOSONG_ +41\DER\IM000005.JPG,_LABEL_KOSONG_ +41\DER\IM000006.JPG,_LABEL_KOSONG_ +41\IZQ\IM000008.JPG,_LABEL_KOSONG_ +41\IZQ\IM000010.JPG,_LABEL_KOSONG_ +41\IZQ\IM000012.JPG,_LABEL_KOSONG_ +41\IZQ\IM000015.JPG,_LABEL_KOSONG_ +41\IZQ\IM000016.JPG,_LABEL_KOSONG_ +41\IZQ\IM000017.JPG,_LABEL_KOSONG_ +42\DER\IM000001.JPG,_LABEL_KOSONG_ +42\DER\IM000002.JPG,_LABEL_KOSONG_ +42\DER\IM000003.JPG,_LABEL_KOSONG_ +42\DER\IM000004.JPG,_LABEL_KOSONG_ +42\DER\IM000005.JPG,_LABEL_KOSONG_ +42\DER\IM000006.JPG,_LABEL_KOSONG_ +42\DER\IM000007.JPG,_LABEL_KOSONG_ +42\IZQ\IM000008.JPG,_LABEL_KOSONG_ +42\IZQ\IM000009.JPG,_LABEL_KOSONG_ +42\IZQ\IM000010.JPG,_LABEL_KOSONG_ +42\IZQ\IM000011.JPG,_LABEL_KOSONG_ +43\Izq\IM000000.JPG,_LABEL_KOSONG_ +43\Izq\IM000001.JPG,_LABEL_KOSONG_ +43\Izq\IM000002.JPG,_LABEL_KOSONG_ +43\Izq\IM000003.JPG,_LABEL_KOSONG_ +43\Izq\IM000004.JPG,_LABEL_KOSONG_ +43\Izq\IM000005.JPG,_LABEL_KOSONG_ +44\DER\IM000000.JPG,_LABEL_KOSONG_ +44\DER\IM000001.JPG,_LABEL_KOSONG_ +44\DER\IM000002.JPG,_LABEL_KOSONG_ +44\DER\IM000003.JPG,_LABEL_KOSONG_ +44\DER\IM000004.JPG,_LABEL_KOSONG_ +44\DER\IM000005.JPG,_LABEL_KOSONG_ +44\IZQ\IM000006.JPG,_LABEL_KOSONG_ +44\IZQ\IM000007.JPG,_LABEL_KOSONG_ +44\IZQ\IM000008.JPG,_LABEL_KOSONG_ +44\IZQ\IM000009.JPG,_LABEL_KOSONG_ +44\IZQ\IM000010.JPG,_LABEL_KOSONG_ +44\IZQ\IM000011.JPG,_LABEL_KOSONG_ +44\IZQ\IM000012.JPG,_LABEL_KOSONG_ +44\IZQ\IM000013.JPG,_LABEL_KOSONG_ +45\DER\IM000000.JPG,_LABEL_KOSONG_ +45\DER\IM000001.JPG,_LABEL_KOSONG_ +45\DER\IM000002.JPG,_LABEL_KOSONG_ +45\IZQ\IM000003.JPG,_LABEL_KOSONG_ +45\IZQ\IM000004.JPG,_LABEL_KOSONG_ +45\IZQ\IM000005.JPG,_LABEL_KOSONG_ +45\IZQ\IM000006.JPG,_LABEL_KOSONG_ +46\DER\IM000001.JPG,_LABEL_KOSONG_ +46\DER\IM000002.JPG,_LABEL_KOSONG_ +46\DER\IM000004.JPG,_LABEL_KOSONG_ +46\DER\IM000005.JPG,_LABEL_KOSONG_ +46\IZQ\IM000006.JPG,_LABEL_KOSONG_ +46\IZQ\IM000008.JPG,_LABEL_KOSONG_ +46\IZQ\IM000009.JPG,_LABEL_KOSONG_ +46\IZQ\IM000010.JPG,_LABEL_KOSONG_ +46\IZQ\IM000011.JPG,_LABEL_KOSONG_ +46\IZQ\IM000012.JPG,_LABEL_KOSONG_ +46\IZQ\IM000013.JPG,_LABEL_KOSONG_ +47\DER\IM000000.JPG,_LABEL_KOSONG_ +47\DER\IM000001.JPG,_LABEL_KOSONG_ +47\DER\IM000002.JPG,_LABEL_KOSONG_ +47\DER\IM000003.JPG,_LABEL_KOSONG_ +47\DER\IM000004.JPG,_LABEL_KOSONG_ +47\IZQ\IM000005.JPG,_LABEL_KOSONG_ +47\IZQ\IM000006.JPG,_LABEL_KOSONG_ +47\IZQ\IM000007.JPG,_LABEL_KOSONG_ +47\IZQ\IM000008.JPG,_LABEL_KOSONG_ +47\IZQ\IM000009.JPG,_LABEL_KOSONG_ +47\IZQ\IM000010.JPG,_LABEL_KOSONG_ +47\IZQ\IM000011.JPG,_LABEL_KOSONG_ +48\DER\IM000001.JPG,_LABEL_KOSONG_ +48\DER\IM000002.JPG,_LABEL_KOSONG_ +48\DER\IM000003.JPG,_LABEL_KOSONG_ +48\DER\IM000004.JPG,_LABEL_KOSONG_ +48\DER\IM000005.JPG,_LABEL_KOSONG_ +48\IZQ\IM000006.JPG,_LABEL_KOSONG_ +48\IZQ\IM000007.JPG,_LABEL_KOSONG_ +48\IZQ\IM000008.JPG,_LABEL_KOSONG_ +48\IZQ\IM000009.JPG,_LABEL_KOSONG_ +49\DER\IM000000.JPG,_LABEL_KOSONG_ +49\DER\IM000001.JPG,_LABEL_KOSONG_ +49\DER\IM000002.JPG,_LABEL_KOSONG_ +49\DER\IM000003.JPG,_LABEL_KOSONG_ +49\DER\IM000004.JPG,_LABEL_KOSONG_ +49\DER\IM000005.JPG,_LABEL_KOSONG_ +49\IZQ\IM000006.JPG,_LABEL_KOSONG_ +49\IZQ\IM000007.JPG,_LABEL_KOSONG_ +49\IZQ\IM000008.JPG,_LABEL_KOSONG_ +49\IZQ\IM000009.JPG,_LABEL_KOSONG_ +49\IZQ\IM000010.JPG,_LABEL_KOSONG_ +50\DER\IM000000.JPG,_LABEL_KOSONG_ +50\DER\IM000001.JPG,_LABEL_KOSONG_ +50\DER\IM000002.JPG,_LABEL_KOSONG_ +50\DER\IM000003.JPG,_LABEL_KOSONG_ +50\DER\IM000004.JPG,_LABEL_KOSONG_ +50\IZQ\IM000005.JPG,_LABEL_KOSONG_ +50\IZQ\IM000006.JPG,_LABEL_KOSONG_ +50\IZQ\IM000007.JPG,_LABEL_KOSONG_ +50\IZQ\IM000008.JPG,_LABEL_KOSONG_ +50\IZQ\IM000009.JPG,_LABEL_KOSONG_ +50\IZQ\IM000010.JPG,_LABEL_KOSONG_ +51\DER\IM000001.JPG,_LABEL_KOSONG_ +51\DER\IM000002.JPG,_LABEL_KOSONG_ +51\DER\IM000003.JPG,_LABEL_KOSONG_ +51\DER\IM000004.JPG,_LABEL_KOSONG_ +51\DER\IM000005.JPG,_LABEL_KOSONG_ +51\IZQ\IM000006.JPG,_LABEL_KOSONG_ +51\IZQ\IM000007.JPG,_LABEL_KOSONG_ +51\IZQ\IM000008.JPG,_LABEL_KOSONG_ +51\IZQ\IM000009.JPG,_LABEL_KOSONG_ +51\IZQ\IM000010.JPG,_LABEL_KOSONG_ +52\DER\IM000000.JPG,_LABEL_KOSONG_ +52\DER\IM000001.JPG,_LABEL_KOSONG_ +52\DER\IM000002.JPG,_LABEL_KOSONG_ +52\DER\IM000003.JPG,_LABEL_KOSONG_ +52\DER\IM000004.JPG,_LABEL_KOSONG_ +52\DER\IM000005.JPG,_LABEL_KOSONG_ +52\IZQ\IM000006.JPG,_LABEL_KOSONG_ +52\IZQ\IM000007.JPG,_LABEL_KOSONG_ +52\IZQ\IM000008.JPG,_LABEL_KOSONG_ +52\IZQ\IM000009.JPG,_LABEL_KOSONG_ +52\IZQ\IM000010.JPG,_LABEL_KOSONG_ +52\IZQ\IM000011.JPG,_LABEL_KOSONG_ +52\IZQ\IM000012.JPG,_LABEL_KOSONG_ +53\DER\IM000000.JPG,_LABEL_KOSONG_ +53\DER\IM000001.JPG,_LABEL_KOSONG_ +53\DER\IM000002.JPG,_LABEL_KOSONG_ +53\DER\IM000003.JPG,_LABEL_KOSONG_ +53\DER\IM000004.JPG,_LABEL_KOSONG_ +53\DER\IM000005.JPG,_LABEL_KOSONG_ +53\DER\IM000006.JPG,_LABEL_KOSONG_ +53\DER\IM000007.JPG,_LABEL_KOSONG_ +53\IZQ\IM000008.JPG,_LABEL_KOSONG_ +53\IZQ\IM000009.JPG,_LABEL_KOSONG_ +53\IZQ\IM000010.JPG,_LABEL_KOSONG_ +53\IZQ\IM000011.JPG,_LABEL_KOSONG_ +54\DER\IM000001.JPG,_LABEL_KOSONG_ +54\DER\IM000002.JPG,_LABEL_KOSONG_ +54\DER\IM000003.JPG,_LABEL_KOSONG_ +54\DER\IM000004.JPG,_LABEL_KOSONG_ +54\DER\IM000005.JPG,_LABEL_KOSONG_ +54\IZQ\IM000006.JPG,_LABEL_KOSONG_ +54\IZQ\IM000007.JPG,_LABEL_KOSONG_ +54\IZQ\IM000008.JPG,_LABEL_KOSONG_ +54\IZQ\IM000009.JPG,_LABEL_KOSONG_ +54\IZQ\IM000010.JPG,_LABEL_KOSONG_ +54\IZQ\IM000011.JPG,_LABEL_KOSONG_ +55\DER\IM000000.JPG,_LABEL_KOSONG_ +55\DER\IM000003.JPG,_LABEL_KOSONG_ +55\DER\IM000004.JPG,_LABEL_KOSONG_ +55\DER\IM000005.JPG,_LABEL_KOSONG_ +55\DER\IM000006.JPG,_LABEL_KOSONG_ +55\DER\IM000007.JPG,_LABEL_KOSONG_ +55\IZQ\IM000008.JPG,_LABEL_KOSONG_ +55\IZQ\IM000009.JPG,_LABEL_KOSONG_ +55\IZQ\IM000010.JPG,_LABEL_KOSONG_ +55\IZQ\IM000011.JPG,_LABEL_KOSONG_ +55\IZQ\IM000012.JPG,_LABEL_KOSONG_ +55\IZQ\IM000013.JPG,_LABEL_KOSONG_ +55\IZQ\IM000014.JPG,_LABEL_KOSONG_ +55\IZQ\IM000015.JPG,_LABEL_KOSONG_ +56\DER\IM000000.JPG,_LABEL_KOSONG_ +56\DER\IM000001.JPG,_LABEL_KOSONG_ +56\DER\IM000002.JPG,_LABEL_KOSONG_ +56\DER\IM000003.JPG,_LABEL_KOSONG_ +56\DER\IM000004.JPG,_LABEL_KOSONG_ +56\DER\IM000005.JPG,_LABEL_KOSONG_ +56\IZQ\IM000006.JPG,_LABEL_KOSONG_ +56\IZQ\IM000007.JPG,_LABEL_KOSONG_ +56\IZQ\IM000008.JPG,_LABEL_KOSONG_ +56\IZQ\IM000009.JPG,_LABEL_KOSONG_ +56\IZQ\IM000010.JPG,_LABEL_KOSONG_ +56\IZQ\IM000011.JPG,_LABEL_KOSONG_ +57\DER\IM000000.JPG,_LABEL_KOSONG_ +57\DER\IM000001.JPG,_LABEL_KOSONG_ +57\DER\IM000002.JPG,_LABEL_KOSONG_ +57\IZQ\IM000003.JPG,_LABEL_KOSONG_ +57\IZQ\IM000004.JPG,_LABEL_KOSONG_ +57\IZQ\IM000005.JPG,_LABEL_KOSONG_ +58\DER\IM000000.JPG,_LABEL_KOSONG_ +58\DER\IM000001.JPG,_LABEL_KOSONG_ +58\DER\IM000002.JPG,_LABEL_KOSONG_ +58\DER\IM000003.JPG,_LABEL_KOSONG_ +58\DER\IM000004.JPG,_LABEL_KOSONG_ +58\IZQ\IM000005.JPG,_LABEL_KOSONG_ +58\IZQ\IM000006.JPG,_LABEL_KOSONG_ +58\IZQ\IM000007.JPG,_LABEL_KOSONG_ +58\IZQ\IM000008.JPG,_LABEL_KOSONG_ +58\IZQ\IM000009.JPG,_LABEL_KOSONG_ +58\IZQ\IM000010.JPG,_LABEL_KOSONG_ +59\DER\IM000001.JPG,_LABEL_KOSONG_ +59\DER\IM000002.JPG,_LABEL_KOSONG_ +59\DER\IM000003.JPG,_LABEL_KOSONG_ +59\DER\IM000004.JPG,_LABEL_KOSONG_ +59\DER\IM000005.JPG,_LABEL_KOSONG_ +59\DER\IM000006.JPG,_LABEL_KOSONG_ +59\DER\IM000007.JPG,_LABEL_KOSONG_ +59\IZQ\IM000008.JPG,_LABEL_KOSONG_ +59\IZQ\IM000009.JPG,_LABEL_KOSONG_ +59\IZQ\IM000010.JPG,_LABEL_KOSONG_ +60\DER\IM000001.JPG,_LABEL_KOSONG_ +60\DER\IM000002.JPG,_LABEL_KOSONG_ +60\DER\IM000003.JPG,_LABEL_KOSONG_ +60\DER\IM000004.JPG,_LABEL_KOSONG_ +60\IZQ\IM000005.JPG,_LABEL_KOSONG_ +60\IZQ\IM000006.JPG,_LABEL_KOSONG_ +60\IZQ\IM000007.JPG,_LABEL_KOSONG_ +60\IZQ\IM000008.JPG,_LABEL_KOSONG_ +60\IZQ\IM000009.JPG,_LABEL_KOSONG_ +61\IZQ\IM000000.JPG,_LABEL_KOSONG_ +61\IZQ\IM000001.JPG,_LABEL_KOSONG_ +61\IZQ\IM000002.JPG,_LABEL_KOSONG_ +61\IZQ\IM000003.JPG,_LABEL_KOSONG_ +61\IZQ\IM000004.JPG,_LABEL_KOSONG_ +62\DER\IM000000.JPG,_LABEL_KOSONG_ +62\DER\IM000001.JPG,_LABEL_KOSONG_ +62\DER\IM000002.JPG,_LABEL_KOSONG_ +62\DER\IM000003.JPG,_LABEL_KOSONG_ +62\DER\IM000004.JPG,_LABEL_KOSONG_ +62\DER\IM000005.JPG,_LABEL_KOSONG_ +62\IZQ\IM000006.JPG,_LABEL_KOSONG_ +62\IZQ\IM000007.JPG,_LABEL_KOSONG_ +62\IZQ\IM000008.JPG,_LABEL_KOSONG_ +62\IZQ\IM000009.JPG,_LABEL_KOSONG_ +62\IZQ\IM000010.JPG,_LABEL_KOSONG_ +62\IZQ\IM000011.JPG,_LABEL_KOSONG_ +62\IZQ\IM000012.JPG,_LABEL_KOSONG_ +62\IZQ\IM000013.JPG,_LABEL_KOSONG_ +62\IZQ\IM000014.JPG,_LABEL_KOSONG_ +63\DER\IM000000.JPG,_LABEL_KOSONG_ +63\DER\IM000001.JPG,_LABEL_KOSONG_ +63\DER\IM000002.JPG,_LABEL_KOSONG_ +63\DER\IM000003.JPG,_LABEL_KOSONG_ +63\DER\IM000004.JPG,_LABEL_KOSONG_ +63\DER\IM000005.JPG,_LABEL_KOSONG_ +63\IZQ\IM000006.JPG,_LABEL_KOSONG_ +63\IZQ\IM000007.JPG,_LABEL_KOSONG_ +63\IZQ\IM000008.JPG,_LABEL_KOSONG_ +63\IZQ\IM000009.JPG,_LABEL_KOSONG_ +63\IZQ\IM000010.JPG,_LABEL_KOSONG_ +63\IZQ\IM000011.JPG,_LABEL_KOSONG_ +64\DER\IM000000.JPG,_LABEL_KOSONG_ +64\DER\IM000001.JPG,_LABEL_KOSONG_ +64\DER\IM000002.JPG,_LABEL_KOSONG_ +64\DER\IM000003.JPG,_LABEL_KOSONG_ +64\IZQ\IM000004.JPG,_LABEL_KOSONG_ +64\IZQ\IM000005.JPG,_LABEL_KOSONG_ +64\IZQ\IM000006.JPG,_LABEL_KOSONG_ +64\IZQ\IM000007.JPG,_LABEL_KOSONG_ +64\IZQ\IM000008.JPG,_LABEL_KOSONG_ +64\IZQ\IM000009.JPG,_LABEL_KOSONG_ +65\DER\IM000000.JPG,_LABEL_KOSONG_ +65\DER\IM000001.JPG,_LABEL_KOSONG_ +65\DER\IM000002.JPG,_LABEL_KOSONG_ +65\DER\IM000003.JPG,_LABEL_KOSONG_ +65\DER\IM000004.JPG,_LABEL_KOSONG_ +65\DER\IM000005.JPG,_LABEL_KOSONG_ +65\IZQ\IM000006.JPG,_LABEL_KOSONG_ +65\IZQ\IM000007.JPG,_LABEL_KOSONG_ +65\IZQ\IM000008.JPG,_LABEL_KOSONG_ +65\IZQ\IM000009.JPG,_LABEL_KOSONG_ +65\IZQ\IM000010.JPG,_LABEL_KOSONG_ +65\IZQ\IM000011.JPG,_LABEL_KOSONG_ +65\IZQ\IM000012.JPG,_LABEL_KOSONG_ +66\DER\IM000001.JPG,_LABEL_KOSONG_ +66\DER\IM000002.JPG,_LABEL_KOSONG_ +66\DER\IM000003.JPG,_LABEL_KOSONG_ +66\DER\IM000004.JPG,_LABEL_KOSONG_ +66\DER\IM000005.JPG,_LABEL_KOSONG_ +66\DER\IM000006.JPG,_LABEL_KOSONG_ +66\IZQ\IM000007.JPG,_LABEL_KOSONG_ +66\IZQ\IM000008.JPG,_LABEL_KOSONG_ +66\IZQ\IM000009.JPG,_LABEL_KOSONG_ +66\IZQ\IM000010.JPG,_LABEL_KOSONG_ +66\IZQ\IM000011.JPG,_LABEL_KOSONG_ +66\IZQ\IM000012.JPG,_LABEL_KOSONG_ +67\DER\IM000000.JPG,_LABEL_KOSONG_ +67\DER\IM000001.JPG,_LABEL_KOSONG_ +67\DER\IM000002.JPG,_LABEL_KOSONG_ +67\DER\IM000003.JPG,_LABEL_KOSONG_ +67\DER\IM000004.JPG,_LABEL_KOSONG_ +67\IZQ\IM000005.JPG,_LABEL_KOSONG_ +67\IZQ\IM000006.JPG,_LABEL_KOSONG_ +67\IZQ\IM000007.JPG,_LABEL_KOSONG_ +67\IZQ\IM000008.JPG,_LABEL_KOSONG_ +67\IZQ\IM000009.JPG,_LABEL_KOSONG_ +67\IZQ\IM000010.JPG,_LABEL_KOSONG_ +68\DER\IM000000.JPG,_LABEL_KOSONG_ +68\DER\IM000001.JPG,_LABEL_KOSONG_ +68\DER\IM000002.JPG,_LABEL_KOSONG_ +68\DER\IM000003.JPG,_LABEL_KOSONG_ +68\DER\IM000004.JPG,_LABEL_KOSONG_ +68\DER\IM000005.JPG,_LABEL_KOSONG_ +68\IZQ\IM000006.JPG,_LABEL_KOSONG_ +68\IZQ\IM000007.JPG,_LABEL_KOSONG_ +68\IZQ\IM000008.JPG,_LABEL_KOSONG_ +68\IZQ\IM000009.JPG,_LABEL_KOSONG_ +68\IZQ\IM000010.JPG,_LABEL_KOSONG_ +68\IZQ\IM000011.JPG,_LABEL_KOSONG_ +68\IZQ\IM000012.JPG,_LABEL_KOSONG_ +68\IZQ\IM000013.JPG,_LABEL_KOSONG_ +68\IZQ\IM000014.JPG,_LABEL_KOSONG_ +68\IZQ\IM000015.JPG,_LABEL_KOSONG_ +69\DER\IM000000.JPG,_LABEL_KOSONG_ +69\DER\IM000001.JPG,_LABEL_KOSONG_ +69\DER\IM000002.JPG,_LABEL_KOSONG_ +69\DER\IM000003.JPG,_LABEL_KOSONG_ +69\DER\IM000004.JPG,_LABEL_KOSONG_ +69\DER\IM000005.JPG,_LABEL_KOSONG_ +69\DER\IM000012.JPG,_LABEL_KOSONG_ +69\IZQ\IM000006.JPG,_LABEL_KOSONG_ +69\IZQ\IM000007.JPG,_LABEL_KOSONG_ +69\IZQ\IM000008.JPG,_LABEL_KOSONG_ +69\IZQ\IM000009.JPG,_LABEL_KOSONG_ +69\IZQ\IM000010.JPG,_LABEL_KOSONG_ +69\IZQ\IM000011.JPG,_LABEL_KOSONG_ +70\DER\IM000001.JPG,_LABEL_KOSONG_ +70\DER\IM000002.JPG,_LABEL_KOSONG_ +70\DER\IM000003.JPG,_LABEL_KOSONG_ +70\DER\IM000004.JPG,_LABEL_KOSONG_ +70\DER\IM000005.JPG,_LABEL_KOSONG_ +70\DER\IM000006.JPG,_LABEL_KOSONG_ +70\IZQ\IM000007.JPG,_LABEL_KOSONG_ +70\IZQ\IM000008.JPG,_LABEL_KOSONG_ +70\IZQ\IM000009.JPG,_LABEL_KOSONG_ +70\IZQ\IM000010.JPG,_LABEL_KOSONG_ +70\IZQ\IM000011.JPG,_LABEL_KOSONG_ +71\DER\IM000001.JPG,_LABEL_KOSONG_ +71\DER\IM000002.JPG,_LABEL_KOSONG_ +71\DER\IM000003.JPG,_LABEL_KOSONG_ +71\DER\IM000004.JPG,_LABEL_KOSONG_ +71\DER\IM000005.JPG,_LABEL_KOSONG_ +71\DER\IM000006.JPG,_LABEL_KOSONG_ +71\DER\IM000007.JPG,_LABEL_KOSONG_ +71\IZQ\IM000008.JPG,_LABEL_KOSONG_ +71\IZQ\IM000009.JPG,_LABEL_KOSONG_ +71\IZQ\IM000010.JPG,_LABEL_KOSONG_ +72\DER\IM000000.JPG,_LABEL_KOSONG_ +72\DER\IM000001.JPG,_LABEL_KOSONG_ +72\DER\IM000002.JPG,_LABEL_KOSONG_ +72\DER\IM000003.JPG,_LABEL_KOSONG_ +72\DER\IM000004.JPG,_LABEL_KOSONG_ +72\DER\IM000005.JPG,_LABEL_KOSONG_ +72\DER\IM000006.JPG,_LABEL_KOSONG_ +72\IZQ\IM000007.JPG,_LABEL_KOSONG_ +72\IZQ\IM000008.JPG,_LABEL_KOSONG_ +72\IZQ\IM000009.JPG,_LABEL_KOSONG_ +72\IZQ\IM000010.JPG,_LABEL_KOSONG_ +72\IZQ\IM000011.JPG,_LABEL_KOSONG_ +72\IZQ\IM000012.JPG,_LABEL_KOSONG_ +72\IZQ\IM000013.JPG,_LABEL_KOSONG_ +72\IZQ\IM000014.JPG,_LABEL_KOSONG_ +72\IZQ\IM000015.JPG,_LABEL_KOSONG_ +73\DER\IM000000.JPG,_LABEL_KOSONG_ +73\DER\IM000001.JPG,_LABEL_KOSONG_ +73\DER\IM000002.JPG,_LABEL_KOSONG_ +73\DER\IM000003.JPG,_LABEL_KOSONG_ +73\DER\IM000004.JPG,_LABEL_KOSONG_ +73\DER\IM000005.JPG,_LABEL_KOSONG_ +73\DER\IM000006.JPG,_LABEL_KOSONG_ +73\IZQ\IM000007.JPG,_LABEL_KOSONG_ +73\IZQ\IM000008.JPG,_LABEL_KOSONG_ +73\IZQ\IM000009.JPG,_LABEL_KOSONG_ +73\IZQ\IM000010.JPG,_LABEL_KOSONG_ +73\IZQ\IM000011.JPG,_LABEL_KOSONG_ +73\IZQ\IM000012.JPG,_LABEL_KOSONG_ +74\DER\IM000001.JPG,_LABEL_KOSONG_ +74\DER\IM000002.JPG,_LABEL_KOSONG_ +74\DER\IM000003.JPG,_LABEL_KOSONG_ +74\DER\IM000004.JPG,_LABEL_KOSONG_ +74\DER\IM000005.JPG,_LABEL_KOSONG_ +74\DER\IM000006.JPG,_LABEL_KOSONG_ +74\IZQ\IM000007.JPG,_LABEL_KOSONG_ +74\IZQ\IM000008.JPG,_LABEL_KOSONG_ +74\IZQ\IM000009.JPG,_LABEL_KOSONG_ +74\IZQ\IM000010.JPG,_LABEL_KOSONG_ +74\IZQ\IM000011.JPG,_LABEL_KOSONG_ +74\IZQ\IM000012.JPG,_LABEL_KOSONG_ +75\DER\IM000000.JPG,_LABEL_KOSONG_ +75\DER\IM000001.JPG,_LABEL_KOSONG_ +75\DER\IM000003.JPG,_LABEL_KOSONG_ +75\DER\IM000004.JPG,_LABEL_KOSONG_ +75\DER\IM000005.JPG,_LABEL_KOSONG_ +75\DER\IM000006.JPG,_LABEL_KOSONG_ +75\IZQ\IM000007.JPG,_LABEL_KOSONG_ +75\IZQ\IM000008.JPG,_LABEL_KOSONG_ +75\IZQ\IM000009.JPG,_LABEL_KOSONG_ +75\IZQ\IM000010.JPG,_LABEL_KOSONG_ +76\DER\IM000000.JPG,_LABEL_KOSONG_ +76\DER\IM000001.JPG,_LABEL_KOSONG_ +76\DER\IM000002.JPG,_LABEL_KOSONG_ +76\DER\IM000003.JPG,_LABEL_KOSONG_ +76\IZQ\IM000004.JPG,_LABEL_KOSONG_ +76\IZQ\IM000005.JPG,_LABEL_KOSONG_ +76\IZQ\IM000006.JPG,_LABEL_KOSONG_ +76\IZQ\IM000007.JPG,_LABEL_KOSONG_ +76\IZQ\IM000008.JPG,_LABEL_KOSONG_ +76\IZQ\IM000009.JPG,_LABEL_KOSONG_ +76\IZQ\IM000010.JPG,_LABEL_KOSONG_ +77\DER\IM000000.JPG,_LABEL_KOSONG_ +77\DER\IM000001.JPG,_LABEL_KOSONG_ +77\DER\IM000002.JPG,_LABEL_KOSONG_ +77\DER\IM000003.JPG,_LABEL_KOSONG_ +77\DER\IM000004.JPG,_LABEL_KOSONG_ +77\DER\IM000005.JPG,_LABEL_KOSONG_ +77\DER\IM000006.JPG,_LABEL_KOSONG_ +77\IZQ\IM000007.JPG,_LABEL_KOSONG_ +77\IZQ\IM000008.JPG,_LABEL_KOSONG_ +77\IZQ\IM000009.JPG,_LABEL_KOSONG_ +77\IZQ\IM000010.JPG,_LABEL_KOSONG_ +77\IZQ\IM000011.JPG,_LABEL_KOSONG_ +77\IZQ\IM000012.JPG,_LABEL_KOSONG_ +78\DER\IM000000.JPG,_LABEL_KOSONG_ +78\DER\IM000001.JPG,_LABEL_KOSONG_ +78\DER\IM000002.JPG,_LABEL_KOSONG_ +78\DER\IM000003.JPG,_LABEL_KOSONG_ +78\DER\IM000004.JPG,_LABEL_KOSONG_ +78\DER\IM000005.JPG,_LABEL_KOSONG_ +79\DER\IM000001.JPG,_LABEL_KOSONG_ +79\DER\IM000002.JPG,_LABEL_KOSONG_ +79\DER\IM000003.JPG,_LABEL_KOSONG_ +79\DER\IM000004.JPG,_LABEL_KOSONG_ +79\DER\IM000005.JPG,_LABEL_KOSONG_ +79\DER\IM000006.JPG,_LABEL_KOSONG_ +79\DER\IM000007.JPG,_LABEL_KOSONG_ +79\IZQ\IM000008.JPG,_LABEL_KOSONG_ +79\IZQ\IM000009.JPG,_LABEL_KOSONG_ +79\IZQ\IM000010.JPG,_LABEL_KOSONG_ +79\IZQ\IM000011.JPG,_LABEL_KOSONG_ +79\IZQ\IM000012.JPG,_LABEL_KOSONG_ +79\IZQ\IM000013.JPG,_LABEL_KOSONG_ +80\DER\IM000001.JPG,_LABEL_KOSONG_ +80\DER\IM000002.JPG,_LABEL_KOSONG_ +80\DER\IM000003.JPG,_LABEL_KOSONG_ +80\DER\IM000004.JPG,_LABEL_KOSONG_ +80\DER\IM000005.JPG,_LABEL_KOSONG_ +80\DER\IM000006.JPG,_LABEL_KOSONG_ +80\IZQ\IM000007.JPG,_LABEL_KOSONG_ +80\IZQ\IM000008.JPG,_LABEL_KOSONG_ +80\IZQ\IM000009.JPG,_LABEL_KOSONG_ +80\IZQ\IM000010.JPG,_LABEL_KOSONG_ +80\IZQ\IM000011.JPG,_LABEL_KOSONG_ +81\DER\IM000000.JPG,_LABEL_KOSONG_ +81\DER\IM000001.JPG,_LABEL_KOSONG_ +81\DER\IM000002.JPG,_LABEL_KOSONG_ +81\DER\IM000003.JPG,_LABEL_KOSONG_ +81\DER\IM000004.JPG,_LABEL_KOSONG_ +81\DER\IM000005.JPG,_LABEL_KOSONG_ +81\DER\IM000006.JPG,_LABEL_KOSONG_ +81\IZQ\IM000007.JPG,_LABEL_KOSONG_ +81\IZQ\IM000008.JPG,_LABEL_KOSONG_ +82\DER\IM000000.JPG,_LABEL_KOSONG_ +82\DER\IM000001.JPG,_LABEL_KOSONG_ +82\DER\IM000002.JPG,_LABEL_KOSONG_ +82\DER\IM000003.JPG,_LABEL_KOSONG_ +82\DER\IM000004.JPG,_LABEL_KOSONG_ +82\DER\IM000005.JPG,_LABEL_KOSONG_ +82\IZQ\IM000006.JPG,_LABEL_KOSONG_ +82\IZQ\IM000007.JPG,_LABEL_KOSONG_ +82\IZQ\IM000008.JPG,_LABEL_KOSONG_ +82\IZQ\IM000009.JPG,_LABEL_KOSONG_ +82\IZQ\IM000010.JPG,_LABEL_KOSONG_ +82\IZQ\IM000011.JPG,_LABEL_KOSONG_ +83\DER\IM000000.JPG,_LABEL_KOSONG_ +83\DER\IM000001.JPG,_LABEL_KOSONG_ +83\DER\IM000002.JPG,_LABEL_KOSONG_ +83\DER\IM000003.JPG,_LABEL_KOSONG_ +83\DER\IM000004.JPG,_LABEL_KOSONG_ +83\DER\IM000005.JPG,_LABEL_KOSONG_ +83\DER\IM000006.JPG,_LABEL_KOSONG_ +83\DER\IM000007.JPG,_LABEL_KOSONG_ +83\IZQ\IM000008.JPG,_LABEL_KOSONG_ +83\IZQ\IM000009.JPG,_LABEL_KOSONG_ +83\IZQ\IM000010.JPG,_LABEL_KOSONG_ +83\IZQ\IM000011.JPG,_LABEL_KOSONG_ +83\IZQ\IM000012.JPG,_LABEL_KOSONG_ +84\DER\IM000000.JPG,_LABEL_KOSONG_ +84\DER\IM000001.JPG,_LABEL_KOSONG_ +84\DER\IM000002.JPG,_LABEL_KOSONG_ +84\DER\IM000003.JPG,_LABEL_KOSONG_ +84\DER\IM000004.JPG,_LABEL_KOSONG_ +84\DER\IM000005.JPG,_LABEL_KOSONG_ +84\DER\IM000006.JPG,_LABEL_KOSONG_ +84\DER\IM000007.JPG,_LABEL_KOSONG_ +84\DER\IM000008.JPG,_LABEL_KOSONG_ +84\DER\IM000009.JPG,_LABEL_KOSONG_ +85\DER\IM000001.JPG,_LABEL_KOSONG_ +85\DER\IM000002.JPG,_LABEL_KOSONG_ +85\DER\IM000003.JPG,_LABEL_KOSONG_ +85\DER\IM000004.JPG,_LABEL_KOSONG_ +85\DER\IM000005.JPG,_LABEL_KOSONG_ +85\DER\IM000006.JPG,_LABEL_KOSONG_ +85\IZQ\IM000007.JPG,_LABEL_KOSONG_ +85\IZQ\IM000008.JPG,_LABEL_KOSONG_ +85\IZQ\IM000011.JPG,_LABEL_KOSONG_ +86\DER\IM000000.JPG,_LABEL_KOSONG_ +86\DER\IM000001.JPG,_LABEL_KOSONG_ +86\DER\IM000002.JPG,_LABEL_KOSONG_ +86\DER\IM000003.JPG,_LABEL_KOSONG_ +86\DER\IM000004.JPG,_LABEL_KOSONG_ +86\IZQ\IM000005.JPG,_LABEL_KOSONG_ +86\IZQ\IM000006.JPG,_LABEL_KOSONG_ +86\IZQ\IM000007.JPG,_LABEL_KOSONG_ +86\IZQ\IM000008.JPG,_LABEL_KOSONG_ +86\IZQ\IM000009.JPG,_LABEL_KOSONG_ +86\IZQ\IM000010.JPG,_LABEL_KOSONG_ +86\IZQ\IM000011.JPG,_LABEL_KOSONG_ +87\DER\IM000000.JPG,_LABEL_KOSONG_ +87\DER\IM000005.JPG,_LABEL_KOSONG_ +87\DER\IM000006.JPG,_LABEL_KOSONG_ +87\DER\IM000007.JPG,_LABEL_KOSONG_ +87\IZQ\IM000008.JPG,_LABEL_KOSONG_ +87\IZQ\IM000009.JPG,_LABEL_KOSONG_ +87\IZQ\IM000010.JPG,_LABEL_KOSONG_ +87\IZQ\IM000011.JPG,_LABEL_KOSONG_ +87\IZQ\IM000013.JPG,_LABEL_KOSONG_ +88\DER\IM000000.JPG,_LABEL_KOSONG_ +88\DER\IM000001.JPG,_LABEL_KOSONG_ +88\DER\IM000002.JPG,_LABEL_KOSONG_ +88\DER\IM000003.JPG,_LABEL_KOSONG_ +88\DER\IM000004.JPG,_LABEL_KOSONG_ +88\DER\IM000005.JPG,_LABEL_KOSONG_ +88\DER\IM000006.JPG,_LABEL_KOSONG_ +88\IZQ\IM000007.JPG,_LABEL_KOSONG_ +88\IZQ\IM000008.JPG,_LABEL_KOSONG_ +88\IZQ\IM000009.JPG,_LABEL_KOSONG_ +88\IZQ\IM000010.JPG,_LABEL_KOSONG_ +88\IZQ\IM000011.JPG,_LABEL_KOSONG_ +88\IZQ\IM000012.JPG,_LABEL_KOSONG_ +89\DER\IM000001.JPG,_LABEL_KOSONG_ +89\DER\IM000002.JPG,_LABEL_KOSONG_ +89\DER\IM000003.JPG,_LABEL_KOSONG_ +89\DER\IM000004.JPG,_LABEL_KOSONG_ +89\DER\IM000005.JPG,_LABEL_KOSONG_ +89\DER\IM000006.JPG,_LABEL_KOSONG_ +89\IZQ\IM000007.JPG,_LABEL_KOSONG_ +89\IZQ\IM000008.JPG,_LABEL_KOSONG_ +89\IZQ\IM000009.JPG,_LABEL_KOSONG_ +89\IZQ\IM000010.JPG,_LABEL_KOSONG_ +90\DER\IM000000.JPG,_LABEL_KOSONG_ +90\DER\IM000001.JPG,_LABEL_KOSONG_ +90\DER\IM000002.JPG,_LABEL_KOSONG_ +90\DER\IM000003.JPG,_LABEL_KOSONG_ +90\DER\IM000004.JPG,_LABEL_KOSONG_ +90\DER\IM000005.JPG,_LABEL_KOSONG_ +90\DER\IM000006.JPG,_LABEL_KOSONG_ +90\IZQ\IM000007.JPG,_LABEL_KOSONG_ +90\IZQ\IM000008.JPG,_LABEL_KOSONG_ +90\IZQ\IM000009.JPG,_LABEL_KOSONG_ +90\IZQ\IM000010.JPG,_LABEL_KOSONG_ +90\IZQ\IM000011.JPG,_LABEL_KOSONG_ +91\DER\IM000002.JPG,_LABEL_KOSONG_ +91\DER\IM000003.JPG,_LABEL_KOSONG_ +91\DER\IM000005.JPG,_LABEL_KOSONG_ +91\DER\IM000006.JPG,_LABEL_KOSONG_ +91\IZQ\IM000007.JPG,_LABEL_KOSONG_ +91\IZQ\IM000008.JPG,_LABEL_KOSONG_ +91\IZQ\IM000009.JPG,_LABEL_KOSONG_ +91\IZQ\IM000010.JPG,_LABEL_KOSONG_ +91\IZQ\IM000011.JPG,_LABEL_KOSONG_ +91\IZQ\IM000012.JPG,_LABEL_KOSONG_ +91\IZQ\IM000013.JPG,_LABEL_KOSONG_ +91\IZQ\IM000014.JPG,_LABEL_KOSONG_ +92\DER\IM000001.JPG,_LABEL_KOSONG_ +92\DER\IM000002.JPG,_LABEL_KOSONG_ +92\DER\IM000003.JPG,_LABEL_KOSONG_ +92\DER\IM000004.JPG,_LABEL_KOSONG_ +92\DER\IM000005.JPG,_LABEL_KOSONG_ +92\IZQ\IM000006.JPG,_LABEL_KOSONG_ +92\IZQ\IM000007.JPG,_LABEL_KOSONG_ +92\IZQ\IM000008.JPG,_LABEL_KOSONG_ +92\IZQ\IM000009.JPG,_LABEL_KOSONG_ +92\IZQ\IM000010.JPG,_LABEL_KOSONG_ +92\IZQ\IM000011.JPG,_LABEL_KOSONG_ +92\IZQ\IM000012.JPG,_LABEL_KOSONG_ +92\IZQ\IM000013.JPG,_LABEL_KOSONG_ +93\DER\IM000001.JPG,_LABEL_KOSONG_ +93\DER\IM000002.JPG,_LABEL_KOSONG_ +93\DER\IM000003.JPG,_LABEL_KOSONG_ +93\DER\IM000004.JPG,_LABEL_KOSONG_ +93\DER\IM000005.JPG,_LABEL_KOSONG_ +93\DER\IM000006.JPG,_LABEL_KOSONG_ +93\DER\IM000007.JPG,_LABEL_KOSONG_ +93\IZQ\IM000008.JPG,_LABEL_KOSONG_ +93\IZQ\IM000009.JPG,_LABEL_KOSONG_ +93\IZQ\IM000010.JPG,_LABEL_KOSONG_ +93\IZQ\IM000011.JPG,_LABEL_KOSONG_ +93\IZQ\IM000012.JPG,_LABEL_KOSONG_ +93\IZQ\IM000013.JPG,_LABEL_KOSONG_ +94\DER\IM000000.JPG,_LABEL_KOSONG_ +94\DER\IM000001.JPG,_LABEL_KOSONG_ +94\DER\IM000002.JPG,_LABEL_KOSONG_ +94\DER\IM000003.JPG,_LABEL_KOSONG_ +94\DER\IM000004.JPG,_LABEL_KOSONG_ +94\DER\IM000005.JPG,_LABEL_KOSONG_ +94\IZQ\IM000006.JPG,_LABEL_KOSONG_ +94\IZQ\IM000007.JPG,_LABEL_KOSONG_ +94\IZQ\IM000008.JPG,_LABEL_KOSONG_ +94\IZQ\IM000009.JPG,_LABEL_KOSONG_ +94\IZQ\IM000010.JPG,_LABEL_KOSONG_ +95\DER\IM000000.JPG,_LABEL_KOSONG_ +95\DER\IM000001.JPG,_LABEL_KOSONG_ +95\DER\IM000002.JPG,_LABEL_KOSONG_ +95\DER\IM000003.JPG,_LABEL_KOSONG_ +95\DER\IM000004.JPG,_LABEL_KOSONG_ +95\DER\IM000005.JPG,_LABEL_KOSONG_ +95\IZQ\IM000006.JPG,_LABEL_KOSONG_ +95\IZQ\IM000007.JPG,_LABEL_KOSONG_ +95\IZQ\IM000008.JPG,_LABEL_KOSONG_ +95\IZQ\IM000009.JPG,_LABEL_KOSONG_ +95\IZQ\IM000010.JPG,_LABEL_KOSONG_ +96\DER\IM000001.JPG,_LABEL_KOSONG_ +96\DER\IM000002.JPG,_LABEL_KOSONG_ +96\DER\IM000003.JPG,_LABEL_KOSONG_ +96\DER\IM000004.JPG,_LABEL_KOSONG_ +96\DER\IM000005.JPG,_LABEL_KOSONG_ +96\DER\IM000006.JPG,_LABEL_KOSONG_ +96\DER\IM000007.JPG,_LABEL_KOSONG_ +96\IZQ\IM000008.JPG,_LABEL_KOSONG_ +96\IZQ\IM000009.JPG,_LABEL_KOSONG_ +96\IZQ\IM000010.JPG,_LABEL_KOSONG_ +96\IZQ\IM000011.JPG,_LABEL_KOSONG_ +96\IZQ\IM000012.JPG,_LABEL_KOSONG_ +96\IZQ\IM000013.JPG,_LABEL_KOSONG_ +97\DER\IM000000.JPG,_LABEL_KOSONG_ +97\DER\IM000001.JPG,_LABEL_KOSONG_ +97\DER\IM000002.JPG,_LABEL_KOSONG_ +97\DER\IM000003.JPG,_LABEL_KOSONG_ +97\IZQ\IM000004.JPG,_LABEL_KOSONG_ +97\IZQ\IM000005.JPG,_LABEL_KOSONG_ +97\IZQ\IM000006.JPG,_LABEL_KOSONG_ +97\IZQ\IM000007.JPG,_LABEL_KOSONG_ +98\DER\IM000000.JPG,_LABEL_KOSONG_ +98\DER\IM000001.JPG,_LABEL_KOSONG_ +98\DER\IM000002.JPG,_LABEL_KOSONG_ +98\DER\IM000003.JPG,_LABEL_KOSONG_ +98\DER\IM000004.JPG,_LABEL_KOSONG_ +98\DER\IM000005.JPG,_LABEL_KOSONG_ +98\DER\IM000006.JPG,_LABEL_KOSONG_ +98\DER\IM000007.JPG,_LABEL_KOSONG_ +98\DER\IM000008.JPG,_LABEL_KOSONG_ +98\IZQ\IM000009.JPG,_LABEL_KOSONG_ +98\IZQ\IM000010.JPG,_LABEL_KOSONG_ +98\IZQ\IM000011.JPG,_LABEL_KOSONG_ +98\IZQ\IM000012.JPG,_LABEL_KOSONG_ +98\IZQ\IM000013.JPG,_LABEL_KOSONG_ +98\IZQ\IM000014.JPG,_LABEL_KOSONG_ +98\IZQ\IM000015.JPG,_LABEL_KOSONG_ +98\IZQ\IM000016.JPG,_LABEL_KOSONG_ +98\IZQ\IM000017.JPG,_LABEL_KOSONG_ +99\DER\IM000001.JPG,_LABEL_KOSONG_ +99\DER\IM000002.JPG,_LABEL_KOSONG_ +99\DER\IM000003.JPG,_LABEL_KOSONG_ +99\DER\IM000004.JPG,_LABEL_KOSONG_ +99\DER\IM000005.JPG,_LABEL_KOSONG_ +99\DER\IM000006.JPG,_LABEL_KOSONG_ +99\DER\IM000007.JPG,_LABEL_KOSONG_ +99\IZQ\IM000008.JPG,_LABEL_KOSONG_ +99\IZQ\IM000009.JPG,_LABEL_KOSONG_ +99\IZQ\IM000010.JPG,_LABEL_KOSONG_ +99\IZQ\IM000011.JPG,_LABEL_KOSONG_ +99\IZQ\IM000012.JPG,_LABEL_KOSONG_ +99\IZQ\IM000013.JPG,_LABEL_KOSONG_ +99\IZQ\IM000014.JPG,_LABEL_KOSONG_ diff --git a/outputs/csv/labels.csv b/outputs/csv/labels.csv new file mode 100644 index 0000000000000000000000000000000000000000..ab18af7920b069077d007d2aa30ee5b4db6e1f50 --- /dev/null +++ b/outputs/csv/labels.csv @@ -0,0 +1,599 @@ +image,grade +01\DER\IM000000.JPG,5065002 +01\DER\IM000001.JPG,5065002 +01\DER\IM000002.JPG,5065002 +01\DER\IM000003.JPG,5065002 +02\DER\IM000004.JPG,5065002 +02\DER\IM000005.JPG,5065002 +02\DER\IM000006.JPG,5065002 +02\DER\IM000007.JPG,5065002 +02\DER\IM000008.JPG,5065002 +02\DER\IM000009.JPG,5065002 +03\DER\IM000000.JPG,5065002 +03\DER\IM000001.JPG,5065002 +03\DER\IM000002.JPG,5065002 +03\DER\IM000003.JPG,5065002 +03\DER\IM000004.JPG,5065002 +03\DER\IM000005.JPG,5065002 +03\DER\IM000006.JPG,5065002 +04\DER\IM000001.JPG,5065002 +04\DER\IM000002.JPG,5065002 +04\DER\IM000003.JPG,5065002 +04\DER\IM000004.JPG,5065002 +04\DER\IM000005.JPG,5065002 +05\DER\IM000000.JPG,5065002 +05\DER\IM000001.JPG,5065002 +05\DER\IM000002.JPG,5065002 +05\DER\IM000003.JPG,5065002 +05\DER\IM000004.JPG,5065002 +05\DER\IM000005.JPG,5065002 +05\DER\IM000006.JPG,5065002 +06\Der\IM000000.JPG,5065002 +06\Der\IM000001.JPG,5065002 +06\Der\IM000002.JPG,5065002 +06\Der\IM000003.JPG,5065002 +06\Der\IM000004.JPG,5065002 +06\Der\IM000005.JPG,5065002 +07\DER\IM000000.JPG,5065002 +07\DER\IM000001.JPG,5065002 +07\DER\IM000002.JPG,5065002 +07\DER\IM000003.JPG,5065002 +07\DER\IM000004.JPG,5065002 +07\DER\IM000005.JPG,5065002 +08\IZQ\IM000001.JPG,5065002 +08\IZQ\IM000002.JPG,5065002 +08\IZQ\IM000003.JPG,5065002 +08\IZQ\IM000004.JPG,5065002 +08\IZQ\IM000005.JPG,5065002 +08\IZQ\IM000006.JPG,5065002 +09\IZQ\IM000005.JPG,5065002 +09\IZQ\IM000006.JPG,5065002 +09\IZQ\IM000007.JPG,5065002 +09\IZQ\IM000008.JPG,5065002 +09\IZQ\IM000009.JPG,5065002 +09\IZQ\IM000010.JPG,5065002 +10\DER\IM000001.JPG,5065002 +10\DER\IM000002.JPG,5065002 +10\DER\IM000003.JPG,5065002 +10\DER\IM000004.JPG,5065002 +10\DER\IM000005.JPG,5065002 +10\DER\IM000006.JPG,5065002 +10\DER\IM000007.JPG,5065002 +100\DER\IM000001.JPG,5065002 +100\DER\IM000002.JPG,5065002 +100\DER\IM000004.JPG,5065002 +100\DER\IM000005.JPG,5065002 +100\DER\IM000006.JPG,5065002 +100\DER\IM000007.JPG,5065002 +101\DER\IM000000.JPG,5065002 +101\DER\IM000001.JPG,5065002 +101\DER\IM000002.JPG,5065002 +101\DER\IM000003.JPG,5065002 +101\DER\IM000004.JPG,5065002 +101\DER\IM000005.JPG,5065002 +101\DER\IM000006.JPG,5065002 +102\DER\IM000001.JPG,5065002 +102\DER\IM000002.JPG,5065002 +102\DER\IM000003.JPG,5065002 +102\DER\IM000004.JPG,5065002 +102\DER\IM000005.JPG,5065002 +102\DER\IM000006.JPG,5065002 +103\DER\IM000002.JPG,5065002 +103\DER\IM000003.JPG,5065002 +103\DER\IM000004.JPG,5065002 +103\DER\IM000005.JPG,5065002 +103\DER\IM000006.JPG,5065002 +104\DER\IM000001.JPG,5065002 +104\DER\IM000002.JPG,5065002 +104\DER\IM000003.JPG,5065002 +104\DER\IM000004.JPG,5065002 +104\DER\IM000005.JPG,5065002 +105\DER\IM000001.JPG,5065002 +105\DER\IM000002.JPG,5065002 +105\DER\IM000003.JPG,5065002 +105\DER\IM000004.JPG,5065002 +105\DER\IM000005.JPG,5065002 +105\DER\IM000006.JPG,5065002 +106\DER\IM000001.JPG,5065002 +106\DER\IM000002.JPG,5065002 +106\DER\IM000003.JPG,5065002 +106\DER\IM000004.JPG,5065002 +106\DER\IM000005.JPG,5065002 +107\DER\IM000007.JPG,5065002 +107\DER\IM000008.JPG,5065002 +107\DER\IM000009.JPG,5065002 +107\DER\IM000010.JPG,5065002 +107\DER\IM000011.JPG,5065002 +108\DER\IM000000.JPG,5065002 +108\DER\IM000001.JPG,5065002 +108\DER\IM000002.JPG,5065002 +108\DER\IM000003.JPG,5065002 +108\DER\IM000004.JPG,5065002 +108\DER\IM000005.JPG,5065002 +108\DER\IM000006.JPG,5065002 +108\DER\IM000007.JPG,5065002 +108\DER\IM000008.JPG,5065002 +109\DER\IM000001.JPG,5065002 +109\DER\IM000002.JPG,5065002 +109\DER\IM000003.JPG,5065002 +109\DER\IM000004.JPG,5065002 +109\DER\IM000005.JPG,5065002 +109\DER\IM000006.JPG,5065002 +109\DER\IM000007.JPG,5065002 +11\DER\IM000000.JPG,5065002 +11\DER\IM000001.JPG,5065002 +11\DER\IM000002.JPG,5065002 +11\DER\IM000003.JPG,5065002 +11\DER\IM000004.JPG,5065002 +11\DER\IM000005.JPG,5065002 +110\DER\IM000000.JPG,5065002 +110\DER\IM000001.JPG,5065002 +110\DER\IM000002.JPG,5065002 +110\DER\IM000003.JPG,5065002 +110\DER\IM000004.JPG,5065002 +110\DER\IM000005.JPG,5065002 +110\DER\IM000006.JPG,5065002 +110\DER\IM000007.JPG,5065002 +111\DER\IM000001.JPG,5065002 +111\DER\IM000002.JPG,5065002 +111\DER\IM000003.JPG,5065002 +111\DER\IM000005.JPG,5065002 +111\DER\IM000006.JPG,5065002 +111\DER\IM000007.JPG,5065002 +112\DER\IM000001.JPG,5065002 +112\DER\IM000003.JPG,5065002 +112\DER\IM000004.JPG,5065002 +112\DER\IM000005.JPG,5065002 +112\DER\IM000006.JPG,5065002 +112\DER\IM000007.JPG,5065002 +112\DER\IM000009.JPG,5065002 +112\DER\IM000010.JPG,5065002 +112\DER\IM000011.JPG,5065002 +113\DER\IM000000.JPG,5065002 +113\DER\IM000001.JPG,5065002 +113\DER\IM000002.JPG,5065002 +113\DER\IM000003.JPG,5065002 +113\DER\IM000004.JPG,5065002 +113\DER\IM000005.JPG,5065002 +113\DER\IM000006.JPG,5065002 +113\DER\IM000007.JPG,5065002 +113\DER\IM000008.JPG,5065002 +114\DER\IM000000.JPG,5065002 +114\DER\IM000001.JPG,5065002 +114\DER\IM000002.JPG,5065002 +114\DER\IM000003.JPG,5065002 +114\DER\IM000004.JPG,5065002 +114\DER\IM000005.JPG,5065002 +114\DER\IM000006.JPG,5065002 +114\DER\IM000007.JPG,5065002 +115\DER\IM000000.JPG,5065002 +115\DER\IM000001.JPG,5065002 +115\DER\IM000002.JPG,5065002 +115\DER\IM000003.JPG,5065002 +115\DER\IM000004.JPG,5065002 +115\DER\IM000005.JPG,5065002 +115\DER\IM000006.JPG,5065002 +13\DER\IM000001.JPG,5065002 +13\DER\IM000002.JPG,5065002 +13\DER\IM000003.JPG,5065002 +13\DER\IM000004.JPG,5065002 +13\DER\IM000005.JPG,5065002 +13\DER\IM000006.JPG,5065002 +14\DER\IM000000.JPG,5065002 +14\DER\IM000001.JPG,5065002 +14\DER\IM000002.JPG,5065002 +14\DER\IM000003.JPG,5065002 +14\DER\IM000004.JPG,5065002 +14\DER\IM000005.JPG,5065002 +15\DER\IM000001.JPG,5065002 +15\DER\IM000002.JPG,5065002 +15\DER\IM000003.JPG,5065002 +15\DER\IM000004.JPG,5065002 +16\DER\IM000001.JPG,5065002 +16\DER\IM000002.JPG,5065002 +16\DER\IM000003.JPG,5065002 +16\DER\IM000004.JPG,5065002 +16\DER\IM000005.JPG,5065002 +16\DER\IM000006.JPG,5065002 +16\DER\IM000007.JPG,5065002 +18\DER\IM000002.JPG,5065002 +18\DER\IM000003.JPG,5065002 +18\DER\IM000004.JPG,5065002 +18\DER\IM000005.JPG,5065002 +18\DER\IM000006.JPG,5065002 +19\DER\IM000000.JPG,5065002 +19\DER\IM000001.JPG,5065002 +19\DER\IM000002.JPG,5065002 +19\DER\IM000003.JPG,5065002 +19\DER\IM000004.JPG,5065002 +19\DER\IM000005.JPG,5065002 +19\DER\IM000006.JPG,5065002 +19\DER\IM000007.JPG,5065002 +20\DER\IM000000.JPG,5065002 +20\DER\IM000001.JPG,5065002 +20\DER\IM000002.JPG,5065002 +20\DER\IM000003.JPG,5065002 +20\DER\IM000004.JPG,5065002 +20\DER\IM000005.JPG,5065002 +20\DER\IM000006.JPG,5065002 +20\DER\IM000007.JPG,5065002 +21\DER\IM000000.JPG,5065002 +21\DER\IM000001.JPG,5065002 +21\DER\IM000002.JPG,5065002 +21\DER\IM000003.JPG,5065002 +21\DER\IM000004.JPG,5065002 +21\DER\IM000005.JPG,5065002 +22\DER\IM000000.JPG,5065002 +22\DER\IM000001.JPG,5065002 +22\DER\IM000002.JPG,5065002 +22\DER\IM000003.JPG,5065002 +22\DER\IM000004.JPG,5065002 +22\DER\IM000005.JPG,5065002 +23\DER\IM000001.JPG,5065002 +23\DER\IM000002.JPG,5065002 +23\DER\IM000003.JPG,5065002 +23\DER\IM000004.JPG,5065002 +23\DER\IM000005.JPG,5065002 +24\DER\IM000000.JPG,5065002 +24\DER\IM000001.JPG,5065002 +24\DER\IM000002.JPG,5065002 +24\DER\IM000003.JPG,5065002 +24\DER\IM000004.JPG,5065002 +24\DER\IM000005.JPG,5065002 +24\DER\IM000006.JPG,5065002 +24\DER\IM000007.JPG,5065002 +25\DER\IM000001.JPG,5065002 +25\DER\IM000002.JPG,5065002 +25\DER\IM000003.JPG,5065002 +25\DER\IM000004.JPG,5065002 +25\DER\IM000005.JPG,5065002 +27\DER\IM000002.JPG,5065002 +27\DER\IM000003.JPG,5065002 +27\DER\IM000004.JPG,5065002 +27\DER\IM000005.JPG,5065002 +27\DER\IM000006.JPG,5065002 +28\DER\IM000001.JPG,5065002 +28\DER\IM000002.JPG,5065002 +28\DER\IM000003.JPG,5065002 +28\DER\IM000004.JPG,5065002 +28\DER\IM000005.JPG,5065002 +29\DER\IM000000.JPG,5065002 +29\DER\IM000001.JPG,5065002 +29\DER\IM000002.JPG,5065002 +29\DER\IM000003.JPG,5065002 +29\DER\IM000004.JPG,5065002 +29\DER\IM000005.JPG,5065002 +29\DER\IM000006.JPG,5065002 +30\DER\IM000001.JPG,5065002 +30\DER\IM000002.JPG,5065002 +30\DER\IM000003.JPG,5065002 +30\DER\IM000004.JPG,5065002 +30\DER\IM000005.JPG,5065002 +30\DER\IM000006.JPG,5065002 +30\DER\IM000007.JPG,5065002 +31\DER\IM000001.JPG,5065002 +31\DER\IM000002.JPG,5065002 +31\DER\IM000003.JPG,5065002 +31\DER\IM000004.JPG,5065002 +31\DER\IM000005.JPG,5065002 +32\DER\IM000000.JPG,5065002 +32\DER\IM000001.JPG,5065002 +32\DER\IM000002.JPG,5065002 +32\DER\IM000003.JPG,5065002 +32\DER\IM000004.JPG,5065002 +33\DER\IM000001.JPG,5065002 +33\DER\IM000002.JPG,5065002 +33\DER\IM000003.JPG,5065002 +33\DER\IM000004.JPG,5065002 +33\DER\IM000005.JPG,5065002 +33\DER\IM000006.JPG,5065002 +37\DER\IM000000.JPG,5065002 +37\DER\IM000001.JPG,5065002 +37\DER\IM000002.JPG,5065002 +37\DER\IM000003.JPG,5065002 +37\DER\IM000004.JPG,5065002 +37\DER\IM000005.JPG,5065002 +38\DER\IM000000.JPG,5065002 +38\DER\IM000001.JPG,5065002 +38\DER\IM000002.JPG,5065002 +38\DER\IM000003.JPG,5065002 +38\DER\IM000004.JPG,5065002 +38\DER\IM000005.JPG,5065002 +38\DER\IM000006.JPG,5065002 +38\DER\IM000007.JPG,5065002 +38\DER\IM000008.JPG,5065002 +38\DER\IM000009.JPG,5065002 +38\DER\IM000010.JPG,5065002 +39\DER\IM000002.JPG,5065002 +39\DER\IM000003.JPG,5065002 +39\DER\IM000004.JPG,5065002 +39\DER\IM000005.JPG,5065002 +39\DER\IM000006.JPG,5065002 +40\DER\IM000000.JPG,5065002 +40\DER\IM000001.JPG,5065002 +40\DER\IM000002.JPG,5065002 +40\DER\IM000003.JPG,5065002 +40\DER\IM000004.JPG,5065002 +40\DER\IM000005.JPG,5065002 +41\DER\IM000001.JPG,5065002 +41\DER\IM000002.JPG,5065002 +41\DER\IM000003.JPG,5065002 +41\DER\IM000004.JPG,5065002 +41\DER\IM000005.JPG,5065002 +41\DER\IM000006.JPG,5065002 +42\DER\IM000001.JPG,5065002 +42\DER\IM000002.JPG,5065002 +42\DER\IM000003.JPG,5065002 +42\DER\IM000004.JPG,5065002 +42\DER\IM000005.JPG,5065002 +42\DER\IM000006.JPG,5065002 +42\DER\IM000007.JPG,5065002 +43\Izq\IM000000.JPG,5065002 +43\Izq\IM000001.JPG,5065002 +43\Izq\IM000002.JPG,5065002 +43\Izq\IM000003.JPG,5065002 +43\Izq\IM000004.JPG,5065002 +43\Izq\IM000005.JPG,5065002 +44\DER\IM000000.JPG,5065002 +44\DER\IM000001.JPG,5065002 +44\DER\IM000002.JPG,5065002 +44\DER\IM000003.JPG,5065002 +44\DER\IM000004.JPG,5065002 +44\DER\IM000005.JPG,5065002 +45\DER\IM000000.JPG,5065002 +45\DER\IM000001.JPG,5065002 +45\DER\IM000002.JPG,5065002 +46\DER\IM000001.JPG,5065002 +46\DER\IM000002.JPG,5065002 +46\DER\IM000004.JPG,5065002 +46\DER\IM000005.JPG,5065002 +47\DER\IM000000.JPG,5065002 +47\DER\IM000001.JPG,5065002 +47\DER\IM000002.JPG,5065002 +47\DER\IM000003.JPG,5065002 +47\DER\IM000004.JPG,5065002 +48\DER\IM000001.JPG,5065002 +48\DER\IM000002.JPG,5065002 +48\DER\IM000003.JPG,5065002 +48\DER\IM000004.JPG,5065002 +48\DER\IM000005.JPG,5065002 +49\DER\IM000000.JPG,5065002 +49\DER\IM000001.JPG,5065002 +49\DER\IM000002.JPG,5065002 +49\DER\IM000003.JPG,5065002 +49\DER\IM000004.JPG,5065002 +49\DER\IM000005.JPG,5065002 +52\DER\IM000000.JPG,5065002 +52\DER\IM000001.JPG,5065002 +52\DER\IM000002.JPG,5065002 +52\DER\IM000003.JPG,5065002 +52\DER\IM000004.JPG,5065002 +52\DER\IM000005.JPG,5065002 +53\DER\IM000000.JPG,5065002 +53\DER\IM000001.JPG,5065002 +53\DER\IM000002.JPG,5065002 +53\DER\IM000003.JPG,5065002 +53\DER\IM000004.JPG,5065002 +53\DER\IM000005.JPG,5065002 +53\DER\IM000006.JPG,5065002 +53\DER\IM000007.JPG,5065002 +55\DER\IM000000.JPG,5065002 +55\DER\IM000003.JPG,5065002 +55\DER\IM000004.JPG,5065002 +55\DER\IM000005.JPG,5065002 +55\DER\IM000006.JPG,5065002 +55\DER\IM000007.JPG,5065002 +57\DER\IM000000.JPG,5065002 +57\DER\IM000001.JPG,5065002 +57\DER\IM000002.JPG,5065002 +59\DER\IM000001.JPG,5065002 +59\DER\IM000002.JPG,5065002 +59\DER\IM000003.JPG,5065002 +59\DER\IM000004.JPG,5065002 +59\DER\IM000005.JPG,5065002 +59\DER\IM000006.JPG,5065002 +59\DER\IM000007.JPG,5065002 +60\DER\IM000001.JPG,5065002 +60\DER\IM000002.JPG,5065002 +60\DER\IM000003.JPG,5065002 +60\DER\IM000004.JPG,5065002 +63\DER\IM000000.JPG,5065002 +63\DER\IM000001.JPG,5065002 +63\DER\IM000002.JPG,5065002 +63\DER\IM000003.JPG,5065002 +63\DER\IM000004.JPG,5065002 +63\DER\IM000005.JPG,5065002 +64\DER\IM000000.JPG,5065002 +64\DER\IM000001.JPG,5065002 +64\DER\IM000002.JPG,5065002 +64\DER\IM000003.JPG,5065002 +65\DER\IM000000.JPG,5065002 +65\DER\IM000001.JPG,5065002 +65\DER\IM000002.JPG,5065002 +65\DER\IM000003.JPG,5065002 +65\DER\IM000004.JPG,5065002 +65\DER\IM000005.JPG,5065002 +66\DER\IM000001.JPG,5065002 +66\DER\IM000002.JPG,5065002 +66\DER\IM000003.JPG,5065002 +66\DER\IM000004.JPG,5065002 +66\DER\IM000005.JPG,5065002 +66\DER\IM000006.JPG,5065002 +67\DER\IM000000.JPG,5065002 +67\DER\IM000001.JPG,5065002 +67\DER\IM000002.JPG,5065002 +67\DER\IM000003.JPG,5065002 +67\DER\IM000004.JPG,5065002 +68\DER\IM000000.JPG,5065002 +68\DER\IM000001.JPG,5065002 +68\DER\IM000002.JPG,5065002 +68\DER\IM000003.JPG,5065002 +68\DER\IM000004.JPG,5065002 +68\DER\IM000005.JPG,5065002 +70\DER\IM000001.JPG,5065002 +70\DER\IM000002.JPG,5065002 +70\DER\IM000003.JPG,5065002 +70\DER\IM000004.JPG,5065002 +70\DER\IM000005.JPG,5065002 +70\DER\IM000006.JPG,5065002 +71\DER\IM000001.JPG,5065002 +71\DER\IM000002.JPG,5065002 +71\DER\IM000003.JPG,5065002 +71\DER\IM000004.JPG,5065002 +71\DER\IM000005.JPG,5065002 +71\DER\IM000006.JPG,5065002 +71\DER\IM000007.JPG,5065002 +72\DER\IM000000.JPG,5065002 +72\DER\IM000001.JPG,5065002 +72\DER\IM000002.JPG,5065002 +72\DER\IM000003.JPG,5065002 +72\DER\IM000004.JPG,5065002 +72\DER\IM000005.JPG,5065002 +72\DER\IM000006.JPG,5065002 +73\DER\IM000000.JPG,5065002 +73\DER\IM000001.JPG,5065002 +73\DER\IM000002.JPG,5065002 +73\DER\IM000003.JPG,5065002 +73\DER\IM000004.JPG,5065002 +73\DER\IM000005.JPG,5065002 +73\DER\IM000006.JPG,5065002 +74\DER\IM000001.JPG,5065002 +74\DER\IM000002.JPG,5065002 +74\DER\IM000003.JPG,5065002 +74\DER\IM000004.JPG,5065002 +74\DER\IM000005.JPG,5065002 +74\DER\IM000006.JPG,5065002 +75\DER\IM000000.JPG,5065002 +75\DER\IM000001.JPG,5065002 +75\DER\IM000003.JPG,5065002 +75\DER\IM000004.JPG,5065002 +75\DER\IM000005.JPG,5065002 +75\DER\IM000006.JPG,5065002 +76\DER\IM000000.JPG,5065002 +76\DER\IM000001.JPG,5065002 +76\DER\IM000002.JPG,5065002 +76\DER\IM000003.JPG,5065002 +77\DER\IM000000.JPG,5065002 +77\DER\IM000001.JPG,5065002 +77\DER\IM000002.JPG,5065002 +77\DER\IM000003.JPG,5065002 +77\DER\IM000004.JPG,5065002 +77\DER\IM000005.JPG,5065002 +77\DER\IM000006.JPG,5065002 +78\DER\IM000000.JPG,5065002 +78\DER\IM000001.JPG,5065002 +78\DER\IM000002.JPG,5065002 +78\DER\IM000003.JPG,5065002 +78\DER\IM000004.JPG,5065002 +78\DER\IM000005.JPG,5065002 +79\DER\IM000001.JPG,5065002 +79\DER\IM000002.JPG,5065002 +79\DER\IM000003.JPG,5065002 +79\DER\IM000004.JPG,5065002 +79\DER\IM000005.JPG,5065002 +79\DER\IM000006.JPG,5065002 +79\DER\IM000007.JPG,5065002 +80\DER\IM000001.JPG,5065002 +80\DER\IM000002.JPG,5065002 +80\DER\IM000003.JPG,5065002 +80\DER\IM000004.JPG,5065002 +80\DER\IM000005.JPG,5065002 +80\DER\IM000006.JPG,5065002 +81\DER\IM000000.JPG,5065002 +81\DER\IM000001.JPG,5065002 +81\DER\IM000002.JPG,5065002 +81\DER\IM000003.JPG,5065002 +81\DER\IM000004.JPG,5065002 +81\DER\IM000005.JPG,5065002 +81\DER\IM000006.JPG,5065002 +82\DER\IM000000.JPG,5065002 +82\DER\IM000001.JPG,5065002 +82\DER\IM000002.JPG,5065002 +82\DER\IM000003.JPG,5065002 +82\DER\IM000004.JPG,5065002 +82\DER\IM000005.JPG,5065002 +83\DER\IM000000.JPG,5065002 +83\DER\IM000001.JPG,5065002 +83\DER\IM000002.JPG,5065002 +83\DER\IM000003.JPG,5065002 +83\DER\IM000004.JPG,5065002 +83\DER\IM000005.JPG,5065002 +83\DER\IM000006.JPG,5065002 +83\DER\IM000007.JPG,5065002 +84\DER\IM000000.JPG,5065002 +84\DER\IM000001.JPG,5065002 +84\DER\IM000002.JPG,5065002 +84\DER\IM000003.JPG,5065002 +84\DER\IM000004.JPG,5065002 +84\DER\IM000005.JPG,5065002 +84\DER\IM000006.JPG,5065002 +84\DER\IM000007.JPG,5065002 +84\DER\IM000008.JPG,5065002 +84\DER\IM000009.JPG,5065002 +85\DER\IM000001.JPG,5065002 +85\DER\IM000002.JPG,5065002 +85\DER\IM000003.JPG,5065002 +85\DER\IM000004.JPG,5065002 +85\DER\IM000005.JPG,5065002 +85\DER\IM000006.JPG,5065002 +86\DER\IM000000.JPG,5065002 +86\DER\IM000001.JPG,5065002 +86\DER\IM000002.JPG,5065002 +86\DER\IM000003.JPG,5065002 +86\DER\IM000004.JPG,5065002 +87\DER\IM000000.JPG,5065002 +87\DER\IM000005.JPG,5065002 +87\DER\IM000006.JPG,5065002 +87\DER\IM000007.JPG,5065002 +88\DER\IM000000.JPG,5065002 +88\DER\IM000001.JPG,5065002 +88\DER\IM000002.JPG,5065002 +88\DER\IM000003.JPG,5065002 +88\DER\IM000004.JPG,5065002 +88\DER\IM000005.JPG,5065002 +88\DER\IM000006.JPG,5065002 +89\DER\IM000001.JPG,5065002 +89\DER\IM000002.JPG,5065002 +89\DER\IM000003.JPG,5065002 +89\DER\IM000004.JPG,5065002 +89\DER\IM000005.JPG,5065002 +89\DER\IM000006.JPG,5065002 +90\DER\IM000000.JPG,5065002 +90\DER\IM000001.JPG,5065002 +90\DER\IM000002.JPG,5065002 +90\DER\IM000003.JPG,5065002 +90\DER\IM000004.JPG,5065002 +90\DER\IM000005.JPG,5065002 +90\DER\IM000006.JPG,5065002 +91\DER\IM000002.JPG,5065002 +91\DER\IM000003.JPG,5065002 +91\DER\IM000005.JPG,5065002 +91\DER\IM000006.JPG,5065002 +92\DER\IM000001.JPG,5065002 +92\DER\IM000002.JPG,5065002 +92\DER\IM000003.JPG,5065002 +92\DER\IM000004.JPG,5065002 +92\DER\IM000005.JPG,5065002 +93\DER\IM000001.JPG,5065002 +93\DER\IM000002.JPG,5065002 +93\DER\IM000003.JPG,5065002 +93\DER\IM000004.JPG,5065002 +93\DER\IM000005.JPG,5065002 +93\DER\IM000006.JPG,5065002 +93\DER\IM000007.JPG,5065002 +98\DER\IM000000.JPG,5065002 +98\DER\IM000001.JPG,5065002 +98\DER\IM000002.JPG,5065002 +98\DER\IM000003.JPG,5065002 +98\DER\IM000004.JPG,5065002 +98\DER\IM000005.JPG,5065002 +98\DER\IM000006.JPG,5065002 +98\DER\IM000007.JPG,5065002 +98\DER\IM000008.JPG,5065002 +99\DER\IM000001.JPG,5065002 +99\DER\IM000002.JPG,5065002 +99\DER\IM000003.JPG,5065002 +99\DER\IM000004.JPG,5065002 +99\DER\IM000005.JPG,5065002 +99\DER\IM000006.JPG,5065002 +99\DER\IM000007.JPG,5065002 diff --git a/outputs/models/accuracy.png b/outputs/models/accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..17a76529fe0c9856918146bf2f604a697f92936b Binary files /dev/null and b/outputs/models/accuracy.png differ diff --git a/outputs/models/confusion_matrix.png b/outputs/models/confusion_matrix.png new file mode 100644 index 0000000000000000000000000000000000000000..a9c2f48b8aad72c2b5e6833a4a60a2cf0382fd7e Binary files /dev/null and b/outputs/models/confusion_matrix.png differ diff --git a/outputs/models/loss.png b/outputs/models/loss.png new file mode 100644 index 0000000000000000000000000000000000000000..90e38ebea5aff3a1f2d2c6c4e8ea43d43428884f Binary files /dev/null and b/outputs/models/loss.png differ diff --git a/outputs/new_model/accuracy.png b/outputs/new_model/accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..4d364e9a0db0f0f2844c9eb3cc07743996c09a21 Binary files /dev/null and b/outputs/new_model/accuracy.png differ diff --git a/outputs/new_model/confusion_matrix.png b/outputs/new_model/confusion_matrix.png new file mode 100644 index 0000000000000000000000000000000000000000..bcca190e6b4598a56db1e16f14b0002408ae59c6 Binary files /dev/null and b/outputs/new_model/confusion_matrix.png differ diff --git a/outputs/new_model/loss.png b/outputs/new_model/loss.png new file mode 100644 index 0000000000000000000000000000000000000000..9875782d42faad181ac786ba4ce2fb3284f289ef Binary files /dev/null and b/outputs/new_model/loss.png differ diff --git a/outputs/new_model2/accuracy.png b/outputs/new_model2/accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..c23ac41c82fb831f36833894628fe00fd9848bd6 Binary files /dev/null and b/outputs/new_model2/accuracy.png differ diff --git a/outputs/new_model2/confusion_matrix.png b/outputs/new_model2/confusion_matrix.png new file mode 100644 index 0000000000000000000000000000000000000000..289b36cefd5653b2c416e3a22a61e0bed79c119c Binary files /dev/null and b/outputs/new_model2/confusion_matrix.png differ diff --git a/outputs/new_model2/loss.png b/outputs/new_model2/loss.png new file mode 100644 index 0000000000000000000000000000000000000000..1506ea0b7abba47901df16bf8cfcde5a35e0d689 Binary files /dev/null and b/outputs/new_model2/loss.png differ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..62e257692a3dab62cbbe35054594a5de6537a16c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,10 @@ +torch +torchvision +torchaudio +timm +scikit-learn +opencv-python +matplotlib +seaborn +albumentations +wandb diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ef114c73a16067a0152e5206e0e75f2a08c4ef12 --- /dev/null +++ b/src/__init__.py @@ -0,0 +1,7 @@ +# src/__init__.py + +# Biar bisa langsung import fungsi/kelas dari dataset dan model +from .dataset import CustomDataset +from .model import get_vit_model + +__all__ = ["CustomDataset", "get_vit_model"] diff --git a/src/__pycache__/dataset.cpython-310.pyc b/src/__pycache__/dataset.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..119a3f3da507af8db4bf3f40c88f5006305876bc Binary files /dev/null and b/src/__pycache__/dataset.cpython-310.pyc differ diff --git a/src/__pycache__/model.cpython-310.pyc b/src/__pycache__/model.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0b7a83f30a1665dbb79cb3def8bfb4f0cbfcaa9a Binary files /dev/null and b/src/__pycache__/model.cpython-310.pyc differ diff --git a/src/__pycache__/utils.cpython-310.pyc b/src/__pycache__/utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3190e7bc69ea08c2f5017ae6564b17a4ba985e93 Binary files /dev/null and b/src/__pycache__/utils.cpython-310.pyc differ diff --git a/src/convert_to_state_dict.py b/src/convert_to_state_dict.py new file mode 100644 index 0000000000000000000000000000000000000000..7329ee07caa4d7c27cd93fe63c56b935d11c4b99 --- /dev/null +++ b/src/convert_to_state_dict.py @@ -0,0 +1,34 @@ +import torch +from torch.serialization import add_safe_globals +from torch.nn.modules.loss import CrossEntropyLoss + +# Tambahkan whitelist agar bisa load aman +add_safe_globals([CrossEntropyLoss]) + +old_model_path = r"C:\Users\user\Documents\Project\Cataract-ViT\outputs\models\best_swin_model_final.pth" +new_model_path = r"C:\Users\user\Documents\Project\Cataract-ViT\outputs\models\best_swin_weights_only.pth" + +# Load dengan weights_only=False karena ini file trusted +checkpoint = torch.load(old_model_path, map_location="cpu", weights_only=False) + +# Deteksi isi file +if isinstance(checkpoint, dict): + # Coba ambil beberapa kemungkinan key umum + if "state_dict" in checkpoint: + state_dict = checkpoint["state_dict"] + elif "model_state_dict" in checkpoint: + state_dict = checkpoint["model_state_dict"] + elif "model" in checkpoint and hasattr(checkpoint["model"], "state_dict"): + state_dict = checkpoint["model"].state_dict() + else: + raise ValueError(f"❌ Tidak ditemukan key state_dict dalam checkpoint: {checkpoint.keys()}") +elif hasattr(checkpoint, "state_dict"): + # Kalau model langsung + state_dict = checkpoint.state_dict() +else: + raise ValueError("❌ File tidak berisi model atau dictionary dengan state_dict yang valid.") + +# Simpan ulang hanya weight-nya +torch.save(state_dict, new_model_path) + +print(f"✅ State dict berhasil diekstrak dan disimpan ke:\n{new_model_path}") diff --git a/src/dataset.py b/src/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..4b72f2d8b970cc9e23721158050f4bffebcc500a --- /dev/null +++ b/src/dataset.py @@ -0,0 +1,45 @@ +import os +from torch.utils.data import DataLoader, WeightedRandomSampler +from torchvision import datasets, transforms +import numpy as np +from collections import Counter + +def get_dataloaders(data_dir, batch_size=32, image_size=224, num_workers=4): + # Augmentasi training + train_transform = transforms.Compose([ + transforms.RandomHorizontalFlip(), + transforms.RandomVerticalFlip(), + transforms.RandomRotation(degrees=(45)), + transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1), + transforms.RandomResizedCrop(image_size, scale=(0.8, 1.0)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Augmentasi validasi lebih ringan + valid_transform = transforms.Compose([ + transforms.Resize((image_size, image_size)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + ]) + + # Load dataset + train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), transform=train_transform) + valid_dataset = datasets.ImageFolder(os.path.join(data_dir, "valid"), transform=valid_transform) + + # Hitung distribusi class untuk WeightedRandomSampler + class_counts = Counter([label for _, label in train_dataset.samples]) + class_weights = {cls: 1.0 / count for cls, count in class_counts.items()} + sample_weights = [class_weights[label] for _, label in train_dataset.samples] + + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + + # DataLoader + train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, + num_workers=num_workers, pin_memory=True) + valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, + num_workers=num_workers, pin_memory=True) + + return train_loader, valid_loader, train_dataset.classes, train_dataset diff --git a/src/generate_csv.py b/src/generate_csv.py new file mode 100644 index 0000000000000000000000000000000000000000..a7abd1e2669dc9b7849c5891f996f399f6ae44af --- /dev/null +++ b/src/generate_csv.py @@ -0,0 +1,77 @@ +# generate_csv.py (Versi Paling Final dan Tangguh) + +import os +import pandas as pd +import struct + +# --- KONFIGURASI --- +# Pastikan path ini benar, dijalankan dari folder utama proyek +SOURCE_DATA_DIR = os.path.join( + 'data', + 'Nuclear Cataract Database for Biomedical and Machine Learning Applications', + 'Nuclear Cataract Dataset' +) +OUTPUT_CSV_PATH = 'labels.csv' + +# --- SCRIPT UTAMA --- +all_image_data = [] + +print(f"Memindai folder '{SOURCE_DATA_DIR}'...") + +for patient_id in sorted(os.listdir(SOURCE_DATA_DIR)): + patient_path = os.path.join(SOURCE_DATA_DIR, patient_id) + if not os.path.isdir(patient_path): continue + + for eye_folder in os.listdir(patient_path): + eye_path = os.path.join(patient_path, eye_folder) + if not os.path.isdir(eye_path): continue + + datafile_path = os.path.join(eye_path, 'DATAFILE') + + # Lompati folder ini jika tidak ada DATAFILE + if not os.path.exists(datafile_path): + print(f"Peringatan: Tidak ada DATAFILE di folder {eye_path}") + continue + + try: + with open(datafile_path, 'rb') as f: + content = f.read() + + # Cari semua angka desimal (double) di seluruh file biner + # Ini cara paling tangguh untuk menemukan kandidat grade + possible_grades = [g[0] for g in struct.iter_unpack('= 4: + grade_as_integer = struct.unpack('= 8: + grade_as_double = struct.unpack(' {source_file}") + +# Buat folder utama untuk data split +if os.path.exists(SPLIT_DATA_DIR): + shutil.rmtree(SPLIT_DATA_DIR) +os.makedirs(SPLIT_DATA_DIR) + +# Jalankan proses penyalinan +copy_files(train_patients, 'train') +copy_files(valid_patients, 'valid') + +print("\n--- Proses splitting per pasien selesai! ---") +print(f"Dataset baru siap di folder '{SPLIT_DATA_DIR}' dengan struktur yang benar.") \ No newline at end of file diff --git a/src/split_dataset.py b/src/split_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..c988a253ff2a0f3b4bee655ee64d9f0fee80f6e3 --- /dev/null +++ b/src/split_dataset.py @@ -0,0 +1,74 @@ +# split_data.py + +import os +import shutil +import random +import math + +# --- KONFIGURASI --- +# 1. Ganti 'original_dataset' dengan nama folder tempat Anda mengekstrak data Kaggle +SOURCE_DIR = 'dataset' +# 2. Ini adalah folder tujuan tempat 'train' dan 'valid' akan dibuat +TARGET_DIR = 'data' +# 3. Rasio pembagian (0.8 berarti 80% untuk training) +TRAIN_RATIO = 0.8 + +# --- SCRIPT UTAMA --- +print(f"Memulai proses split data dari folder '{SOURCE_DIR}'...") + +# Hapus folder target jika sudah ada untuk memulai dari awal +if os.path.exists(TARGET_DIR): + shutil.rmtree(TARGET_DIR) + +# Buat struktur folder target (train dan valid) +train_path = os.path.join(TARGET_DIR, 'train') +valid_path = os.path.join(TARGET_DIR, 'valid') +os.makedirs(train_path, exist_ok=True) +os.makedirs(valid_path, exist_ok=True) + +# Dapatkan daftar semua folder kelas di direktori sumber +try: + class_folders = [f for f in os.listdir(SOURCE_DIR) if os.path.isdir(os.path.join(SOURCE_DIR, f))] + if not class_folders: + raise FileNotFoundError +except FileNotFoundError: + print(f"!!! ERROR: Folder '{SOURCE_DIR}' tidak ditemukan atau kosong.") + print("Pastikan Anda sudah mengekstrak dataset Kaggle ke dalam folder tersebut.") + exit() + +print(f"Ditemukan {len(class_folders)} kelas: {class_folders}") + +# Loop melalui setiap folder kelas +for cls in class_folders: + source_class_path = os.path.join(SOURCE_DIR, cls) + + # Buat subfolder kelas di dalam train dan valid + train_class_path = os.path.join(train_path, cls) + valid_class_path = os.path.join(valid_path, cls) + os.makedirs(train_class_path, exist_ok=True) + os.makedirs(valid_class_path, exist_ok=True) + + # Dapatkan semua file gambar untuk kelas ini + images = [f for f in os.listdir(source_class_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))] + + # Acak urutan gambar + random.shuffle(images) + + # Hitung titik pembagian + split_point = math.floor(len(images) * TRAIN_RATIO) + + # Bagi daftar gambar menjadi train dan valid + train_images = images[:split_point] + valid_images = images[split_point:] + + print(f" Kelas '{cls}': {len(train_images)} train, {len(valid_images)} valid") + + # Salin file-file ke folder tujuan + for img in train_images: + shutil.copy(os.path.join(source_class_path, img), os.path.join(train_class_path, img)) + + for img in valid_images: + shutil.copy(os.path.join(source_class_path, img), os.path.join(valid_class_path, img)) + +print("\n--- Proses split data selesai! ---") +print(f"Folder '{TARGET_DIR}' dengan struktur 'train' dan 'valid' telah berhasil dibuat.") \ No newline at end of file diff --git a/src/train.py b/src/train.py new file mode 100644 index 0000000000000000000000000000000000000000..3441a17fcb59c599164b8e89638bce3b23c7c157 --- /dev/null +++ b/src/train.py @@ -0,0 +1,180 @@ +# src/train.py (dengan TTA + FocalLoss + EarlyStopping) + +import torch +import torch.nn as nn +import torch.optim as optim +from tqdm import tqdm +import numpy as np +from sklearn.metrics import classification_report +from torch.utils.data import DataLoader, WeightedRandomSampler +from torch.optim.lr_scheduler import ReduceLROnPlateau +import copy + +# Impor dari file lain dalam proyek +from model import create_model +from dataset import get_dataloaders +from utils import save_model, save_plots, save_confusion_matrix, FocalLoss + +# --- 1. KONFIGURASI & HYPERPARAMETERS --- +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +DATA_DIR = 'data' +OUTPUT_DIR = 'outputs/new_model2' +IMAGE_SIZE = 224 +BATCH_SIZE = 16 +NUM_WORKERS = 4 +EPOCHS = 50 +LEARNING_RATE_HEAD = 1e-3 +LEARNING_RATE_FINETUNE = 2e-5 +WEIGHT_DECAY = 0.05 +MODEL_NAME = 'best_model_final_TTA_Focal.pth' +NUM_CLASSES = 4 +PATIENCE = 7 # untuk EarlyStopping + +# --- 2. TRAINING & VALIDASI --- +def train_one_epoch(model, dataloader, optimizer, criterion, device): + model.train() + running_loss, correct_predictions, total_samples = 0.0, 0, 0 + + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Training"): + images, labels = images.to(device), labels.to(device) + optimizer.zero_grad() + outputs = model(images) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * images.size(0) + _, preds = torch.max(outputs, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + return epoch_loss, epoch_acc + + +def validate_one_epoch_tta(model, dataloader, criterion, device): + """ + Validasi dengan Test-Time Augmentation (TTA). + Gunakan prediksi rata-rata dari beberapa augmentasi sederhana. + """ + model.eval() + running_loss, correct_predictions, total_samples = 0.0, 0, 0 + all_preds, all_labels = [], [] + + with torch.no_grad(): + for images, labels in tqdm(dataloader, total=len(dataloader), desc="Validating with TTA"): + images, labels = images.to(device), labels.to(device) + + # TTA: original, flip H, flip V, rotate 90 + outputs_list = [] + outputs_list.append(model(images)) # original + outputs_list.append(model(torch.flip(images, dims=[3]))) # horizontal flip + outputs_list.append(model(torch.flip(images, dims=[2]))) # vertical flip + outputs_list.append(model(torch.rot90(images, k=1, dims=[2, 3]))) # rotate 90° + + # Rata-ratakan probabilitas + outputs_avg = torch.mean( + torch.stack([torch.softmax(out, dim=1) for out in outputs_list]), dim=0 + ) + + loss = criterion(outputs_avg, labels) + running_loss += loss.item() * images.size(0) + + _, preds = torch.max(outputs_avg, 1) + correct_predictions += torch.sum(preds == labels.data) + total_samples += labels.size(0) + + all_preds.extend(preds.cpu().numpy()) + all_labels.extend(labels.cpu().numpy()) + + epoch_loss = running_loss / total_samples + epoch_acc = (correct_predictions.double() / total_samples).item() + + print("\n--- Laporan Klasifikasi Validasi (dengan TTA) ---") + print(classification_report(all_labels, all_preds, target_names=[str(i) for i in range(NUM_CLASSES)], zero_division=0)) + + return epoch_loss, epoch_acc, all_labels, all_preds + +# --- 3. SCRIPT UTAMA --- +if __name__ == '__main__': + _, valid_loader, classes, train_dataset = get_dataloaders(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, NUM_WORKERS) + + print("\n--- Menyiapkan Balanced Sampler untuk Training ---") + class_counts = np.bincount(train_dataset.targets) + class_weights = 1. / torch.tensor(class_counts, dtype=torch.float) + sample_weights = class_weights[train_dataset.targets] + sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=NUM_WORKERS) + + model = create_model(num_classes=NUM_CLASSES, image_size=IMAGE_SIZE).to(DEVICE) + + criterion = nn.CrossEntropyLoss(label_smoothing=0.1).to(DEVICE) + print("Menggunakan CrossEntropyLoss dengan Label Smoothing (0.1).") + + # --- TAHAP 1: FREEZE BACKBONE, LATIH HEAD --- + print("\n--- TAHAP 1: Melatih Classifier Head ---") + for param in model.parameters(): + param.requires_grad = False + for param in model.head.parameters(): + param.requires_grad = True + optimizer_head = optim.AdamW(model.head.parameters(), lr=LEARNING_RATE_HEAD, weight_decay=WEIGHT_DECAY) + for epoch in range(5): + print(f"Epoch Head {epoch+1}/5") + train_one_epoch(model, train_loader, optimizer_head, criterion, DEVICE) + validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + # --- TAHAP 2: UNFREEZE & FINE-TUNE SELURUH MODEL --- + print("\n--- TAHAP 2: Fine-tuning Seluruh Model ---") + for param in model.parameters(): + param.requires_grad = True + optimizer_finetune = optim.AdamW(model.parameters(), lr=LEARNING_RATE_FINETUNE, weight_decay=WEIGHT_DECAY) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer_finetune, T_max=EPOCHS, eta_min=1e-7) + print("Menggunakan scheduler CosineAnnealingLR.") + + history = {'train_loss': [], 'train_acc': [], 'valid_loss': [], 'valid_acc': []} + best_valid_acc = 0.0 + best_labels, best_preds = None, None + best_model_wts = copy.deepcopy(model.state_dict()) + patience_counter = 0 + + for epoch in range(EPOCHS): + print(f"Epoch {epoch+1}/{EPOCHS}") + + train_loss, train_acc = train_one_epoch(model, train_loader, optimizer_finetune, criterion, DEVICE) + valid_loss, valid_acc, valid_labels, valid_preds = validate_one_epoch_tta(model, valid_loader, criterion, DEVICE) + + scheduler.step(valid_loss) + + print(f" Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}") + print(f" Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}") + + history['train_loss'].append(train_loss) + history['train_acc'].append(train_acc) + history['valid_loss'].append(valid_loss) + history['valid_acc'].append(valid_acc) + + # EarlyStopping check + if valid_acc > best_valid_acc: + print(f"Validasi akurasi meningkat dari {best_valid_acc:.4f} ke {valid_acc:.4f}. Menyimpan model...") + save_model(epoch, model, optimizer_finetune, criterion, f"{OUTPUT_DIR}/{MODEL_NAME}") + best_valid_acc = valid_acc + best_labels = valid_labels + best_preds = valid_preds + best_model_wts = copy.deepcopy(model.state_dict()) + patience_counter = 0 + else: + patience_counter += 1 + if patience_counter >= PATIENCE: + print(f"Early stopping di epoch {epoch+1} karena tidak ada peningkatan validasi selama {PATIENCE} epoch.") + break + + # Load best model + model.load_state_dict(best_model_wts) + save_plots(history['train_acc'], history['valid_acc'], history['train_loss'], history['valid_loss'], OUTPUT_DIR) + + if best_labels and best_preds: + save_confusion_matrix(best_labels, best_preds, classes, f"{OUTPUT_DIR}/confusion_matrix.png") + + print("\n--- Selesai ---") + print(f"Model terbaik disimpan di {OUTPUT_DIR}/{MODEL_NAME}") diff --git a/src/upload_model.py b/src/upload_model.py new file mode 100644 index 0000000000000000000000000000000000000000..3e1dc4aaae91e850b5851b390f92443f9103d1e2 --- /dev/null +++ b/src/upload_model.py @@ -0,0 +1,17 @@ +from huggingface_hub import HfApi + +api = HfApi() + +# ganti dengan repo kamu di Hugging Face +repo_id = "Decoder24/ViT-for-Limited-Medical-Data" +model_path = r"C:\Users\user\Documents\Project\Cataract-ViT\outputs\models\best_swin_weights_only.pth" + +# Upload ke repo +api.upload_file( + path_or_fileobj=model_path, + path_in_repo="best_swin_weights_only.pth", + repo_id=repo_id, + repo_type="model" +) + +print("✅ File berhasil diupload ke Hugging Face Hub!") diff --git a/src/utils.py b/src/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..549c086644595de8e2078a6d028b2a72a57cccfa --- /dev/null +++ b/src/utils.py @@ -0,0 +1,89 @@ +# src/utils.py + +import torch +import torch.nn as nn +import torch.nn.functional as F +import matplotlib.pyplot as plt +import os +import seaborn as sns +from sklearn.metrics import confusion_matrix + +plt.style.use('ggplot') + +def save_model(epochs, model, optimizer, criterion, model_path): + """ + Fungsi untuk menyimpan checkpoint model. + """ + print(f"Menyimpan model ke {model_path}") + os.makedirs(os.path.dirname(model_path), exist_ok=True) + torch.save({ + 'epoch': epochs, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'loss': criterion, + }, model_path) + +def save_plots(train_acc, valid_acc, train_loss, valid_loss, plot_path): + """ + Fungsi untuk menyimpan plot akurasi dan loss. + """ + print(f"Menyimpan plot ke {plot_path}") + os.makedirs(os.path.dirname(plot_path), exist_ok=True) + + plt.figure(figsize=(10, 7)) + plt.plot(train_acc, color='green', linestyle='-', label='train accuracy') + plt.plot(valid_acc, color='blue', linestyle='-', label='validation accuracy') + plt.xlabel('Epochs') + plt.ylabel('Accuracy') + plt.legend() + plt.savefig(f"{plot_path}/accuracy.png") + + plt.figure(figsize=(10, 7)) + plt.plot(train_loss, color='orange', linestyle='-', label='train loss') + plt.plot(valid_loss, color='red', linestyle='-', label='validation loss') + plt.xlabel('Epochs') + plt.ylabel('Loss') + plt.legend() + plt.savefig(f"{plot_path}/loss.png") + +def save_confusion_matrix(y_true, y_pred, class_names, save_path): + """ + Menyimpan plot confusion matrix. + """ + cm = confusion_matrix(y_true, y_pred) + plt.figure(figsize=(10, 8)) + sns.heatmap( + cm, + annot=True, + fmt='d', + cmap='Blues', + xticklabels=class_names, + yticklabels=class_names + ) + plt.xlabel('Predicted Label') + plt.ylabel('True Label') + plt.title('Confusion Matrix of Best Validation Model') + plt.savefig(save_path) + print(f"Confusion matrix disimpan di {save_path}") + +class FocalLoss(nn.Module): + """ + Implementasi Focal Loss. + """ + def __init__(self, alpha=1, gamma=2, reduction='mean'): + super(FocalLoss, self).__init__() + self.alpha = alpha + self.gamma = gamma + self.reduction = reduction + + def forward(self, inputs, targets): + ce_loss = F.cross_entropy(inputs, targets, reduction='none') + pt = torch.exp(-ce_loss) + focal_loss = self.alpha * (1-pt)**self.gamma * ce_loss + + if self.reduction == 'mean': + return focal_loss.mean() + elif self.reduction == 'sum': + return focal_loss.sum() + else: + return focal_loss \ No newline at end of file