Refactor code structure for improved readability and maintainability
Browse files- Classification_Reports/ESMC-300m - Random Forest_classification_report.csv +4 -4
- Classification_Reports/ESMC-600m - Random Forest_classification_report.csv +4 -4
- Classification_Reports/Prost T5 - Random Forest_classification_report.csv +4 -4
- Classification_Reports/results_ESM300_RF.csv +0 -10
- Classification_Reports/results_ESM300_SVM.csv +0 -10
- Classification_Reports/results_ESM600_RF.csv +0 -10
- Classification_Reports/results_ESM600_SVM.csv +0 -10
- Classification_Reports/results_PROST_RF.csv +0 -10
- Classification_Reports/results_PROST_SVM.csv +0 -10
- Data/evaluations.csv +7 -7
- Models/ESMC-300m_rf.joblib +2 -2
- Models/ESMC-300m_svm.joblib +1 -1
- Models/ESMC-600m_rf.joblib +2 -2
- Models/ESMC-600m_svm.joblib +1 -1
- Models/Prost T5_rf.joblib +2 -2
- Models/Prost T5_svm.joblib +1 -1
- Plots/ESMC-300m - Random Forest_confusion_matrix.png +0 -0
- Plots/ESMC-300m - Support Vector Machine_confusion_matrix.png +0 -0
- Plots/ESMC-600m - Random Forest_confusion_matrix.png +0 -0
- Plots/ESMC-600m - Support Vector Machine_confusion_matrix.png +0 -0
- Plots/Prost T5 - Random Forest_confusion_matrix.png +0 -0
- Plots/Prost T5 - Support Vector Machine_confusion_matrix.png +0 -0
- notebooks/04_Training.ipynb +2 -2
- src/my_utils.py +17 -6
Classification_Reports/ESMC-300m - Random Forest_classification_report.csv
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
,Cellwall,Cytoplasmic,CytoplasmicMembrane,Extracellular,OuterMembrane,Periplasmic,accuracy,macro avg,weighted avg
|
| 2 |
-
precision,0.
|
| 3 |
-
recall,0.
|
| 4 |
-
f1-score,0.
|
| 5 |
-
support,31.0,2390.0,650.0,263.0,183.0,160.0,0.
|
|
|
|
| 1 |
,Cellwall,Cytoplasmic,CytoplasmicMembrane,Extracellular,OuterMembrane,Periplasmic,accuracy,macro avg,weighted avg
|
| 2 |
+
precision,0.7692307692307693,0.9547456948338006,0.9548611111111112,0.8941176470588236,0.9316770186335404,0.8024691358024691,0.9415284199075333,0.8845168961117523,0.9410913744998538
|
| 3 |
+
recall,0.6451612903225806,0.9974895397489539,0.8461538461538461,0.8669201520912547,0.819672131147541,0.8125,0.9415284199075333,0.8313161599106961,0.9415284199075333
|
| 4 |
+
f1-score,0.7017543859649122,0.9756496828320033,0.8972267536704731,0.8803088803088803,0.872093023255814,0.8074534161490683,0.9415284199075333,0.8557476903635252,0.9401852932227653
|
| 5 |
+
support,31.0,2390.0,650.0,263.0,183.0,160.0,0.9415284199075333,3677.0,3677.0
|
Classification_Reports/ESMC-600m - Random Forest_classification_report.csv
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
,Cellwall,Cytoplasmic,CytoplasmicMembrane,Extracellular,OuterMembrane,Periplasmic,accuracy,macro avg,weighted avg
|
| 2 |
-
precision,0.
|
| 3 |
-
recall,0.
|
| 4 |
-
f1-score,0.
|
| 5 |
-
support,31.0,2390.0,650.0,263.0,183.0,160.0,0.
|
|
|
|
| 1 |
,Cellwall,Cytoplasmic,CytoplasmicMembrane,Extracellular,OuterMembrane,Periplasmic,accuracy,macro avg,weighted avg
|
| 2 |
+
precision,0.8095238095238095,0.9662189662189662,0.9689119170984456,0.8368794326241135,0.9629629629629629,0.7897727272727273,0.9472395974979603,0.8890449692835043,0.9482829651451068
|
| 3 |
+
recall,0.5483870967741935,0.9933054393305439,0.8630769230769231,0.8973384030418251,0.8524590163934426,0.86875,0.9472395974979603,0.8372194797694882,0.9472395974979603
|
| 4 |
+
f1-score,0.6538461538461539,0.9795749948421704,0.9129373474369405,0.8660550458715597,0.9043478260869565,0.8273809523809523,0.9472395974979603,0.8573570534107889,0.9465629115842034
|
| 5 |
+
support,31.0,2390.0,650.0,263.0,183.0,160.0,0.9472395974979603,3677.0,3677.0
|
Classification_Reports/Prost T5 - Random Forest_classification_report.csv
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
,Cellwall,Cytoplasmic,CytoplasmicMembrane,Extracellular,OuterMembrane,Periplasmic,accuracy,macro avg,weighted avg
|
| 2 |
-
precision,0.
|
| 3 |
-
recall,0.5806451612903226,0.
|
| 4 |
-
f1-score,0.
|
| 5 |
-
support,31.0,2390.0,650.0,263.0,183.0,160.0,0.
|
|
|
|
| 1 |
,Cellwall,Cytoplasmic,CytoplasmicMembrane,Extracellular,OuterMembrane,Periplasmic,accuracy,macro avg,weighted avg
|
| 2 |
+
precision,0.6428571428571429,0.9832285115303984,0.9485530546623794,0.7954545454545454,0.9578313253012049,0.7619047619047619,0.9477835191732391,0.8483048902850722,0.9499038997569209
|
| 3 |
+
recall,0.5806451612903226,0.9811715481171548,0.9076923076923077,0.9315589353612167,0.8688524590163934,0.8,0.9477835191732391,0.8449867352462325,0.9477835191732391
|
| 4 |
+
f1-score,0.6101694915254238,0.9821989528795811,0.9276729559748428,0.8581436077057794,0.9111747851002865,0.7804878048780488,0.9477835191732391,0.8449745996773269,0.9482385032046764
|
| 5 |
+
support,31.0,2390.0,650.0,263.0,183.0,160.0,0.9477835191732391,3677.0,3677.0
|
Classification_Reports/results_ESM300_RF.csv
DELETED
|
@@ -1,10 +0,0 @@
|
|
| 1 |
-
,precision,recall,f1-score,support
|
| 2 |
-
Cellwall,0.8842105263157894,0.8842105263157894,0.8842105263157894,95.0
|
| 3 |
-
Cytoplasmic,0.9852189528940462,0.9852189528940462,0.9852189528940462,7239.0
|
| 4 |
-
CytoplasmicMembrane,0.9573234984193888,0.9228034535297105,0.9397465735712438,1969.0
|
| 5 |
-
Extracellular,0.9062870699881376,0.958594730238394,0.9317073170731708,797.0
|
| 6 |
-
OuterMembrane,0.9796296296296296,0.9531531531531532,0.9662100456621004,555.0
|
| 7 |
-
Periplasmic,0.878095238095238,0.9505154639175257,0.9128712871287129,485.0
|
| 8 |
-
accuracy,0.9683123877917414,0.9683123877917414,0.9683123877917414,0.9683123877917414
|
| 9 |
-
macro avg,0.9317941525570382,0.9424160466747699,0.9366607837741773,11140.0
|
| 10 |
-
weighted avg,0.9688376479433536,0.9683123877917414,0.9683950524837511,11140.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Classification_Reports/results_ESM300_SVM.csv
DELETED
|
@@ -1,10 +0,0 @@
|
|
| 1 |
-
,precision,recall,f1-score,support
|
| 2 |
-
Cellwall,0.9540229885057471,0.8736842105263158,0.9120879120879121,95.0
|
| 3 |
-
Cytoplasmic,0.9931299807639461,0.9984804531012571,0.995798029895984,7239.0
|
| 4 |
-
CytoplasmicMembrane,0.990726429675425,0.9766378872524124,0.9836317135549872,1969.0
|
| 5 |
-
Extracellular,0.9672955974842767,0.9648682559598495,0.9660804020100503,797.0
|
| 6 |
-
OuterMembrane,0.9815157116451017,0.9567567567567568,0.968978102189781,555.0
|
| 7 |
-
Periplasmic,0.9357429718875502,0.9608247422680413,0.948118006103764,485.0
|
| 8 |
-
accuracy,0.9874326750448833,0.9874326750448833,0.9874326750448833,0.9874326750448833
|
| 9 |
-
macro avg,0.9704056133270078,0.9552087176441054,0.962449027640413,11140.0
|
| 10 |
-
weighted avg,0.9874462843099304,0.9874326750448833,0.9873956278395704,11140.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Classification_Reports/results_ESM600_RF.csv
DELETED
|
@@ -1,10 +0,0 @@
|
|
| 1 |
-
,precision,recall,f1-score,support
|
| 2 |
-
Cellwall,0.8913043478260869,0.8631578947368421,0.8770053475935828,95.0
|
| 3 |
-
Cytoplasmic,0.9903846153846154,0.995993921812405,0.9931813485777258,7239.0
|
| 4 |
-
CytoplasmicMembrane,0.9894902785076195,0.9563230066023362,0.9726239669421488,1969.0
|
| 5 |
-
Extracellular,0.9381067961165048,0.9698870765370138,0.9537322640345466,797.0
|
| 6 |
-
OuterMembrane,0.9744058500914077,0.9603603603603603,0.9673321234119783,555.0
|
| 7 |
-
Periplasmic,0.9331983805668016,0.9505154639175257,0.9417773237997957,485.0
|
| 8 |
-
accuracy,0.9822262118491921,0.9822262118491921,0.9822262118491921,0.9822262118491921
|
| 9 |
-
macro avg,0.9528150447488395,0.9493729539944139,0.9509420623932964,11140.0
|
| 10 |
-
weighted avg,0.9823556624842636,0.9822262118491921,0.9822089610643375,11140.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Classification_Reports/results_ESM600_SVM.csv
DELETED
|
@@ -1,10 +0,0 @@
|
|
| 1 |
-
,precision,recall,f1-score,support
|
| 2 |
-
Cellwall,0.9647058823529412,0.8631578947368421,0.9111111111111111,95.0
|
| 3 |
-
Cytoplasmic,0.9927207801126219,0.9984804531012571,0.9955922865013774,7239.0
|
| 4 |
-
CytoplasmicMembrane,0.986659825551565,0.9766378872524124,0.9816232771822359,1969.0
|
| 5 |
-
Extracellular,0.9636135508155583,0.9636135508155583,0.9636135508155583,797.0
|
| 6 |
-
OuterMembrane,0.9833948339483395,0.9603603603603603,0.9717411121239745,555.0
|
| 7 |
-
Periplasmic,0.9465020576131687,0.9484536082474226,0.9474768280123584,485.0
|
| 8 |
-
accuracy,0.9868940754039497,0.9868940754039497,0.9868940754039497,0.9868940754039497
|
| 9 |
-
macro avg,0.9729328217323658,0.9517839590856422,0.9618596942911025,11140.0
|
| 10 |
-
weighted avg,0.986851311791162,0.9868940754039497,0.9868318607832719,11140.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Classification_Reports/results_PROST_RF.csv
DELETED
|
@@ -1,10 +0,0 @@
|
|
| 1 |
-
,precision,recall,f1-score,support
|
| 2 |
-
Cellwall,0.9529411764705882,0.8526315789473684,0.9,95.0
|
| 3 |
-
Cytoplasmic,0.9911966987620358,0.9954413593037712,0.9933144944517196,7239.0
|
| 4 |
-
CytoplasmicMembrane,0.9845360824742269,0.9700355510411376,0.9772320286518291,1969.0
|
| 5 |
-
Extracellular,0.9207100591715977,0.9761606022584692,0.9476248477466505,797.0
|
| 6 |
-
OuterMembrane,0.9887850467289719,0.9531531531531532,0.9706422018348624,555.0
|
| 7 |
-
Periplasmic,0.9505376344086022,0.911340206185567,0.9305263157894736,485.0
|
| 8 |
-
accuracy,0.9825852782764811,0.9825852782764811,0.9825852782764811,0.9825852782764811
|
| 9 |
-
macro avg,0.9647844496693371,0.9431270751482446,0.9532233147457557,11140.0
|
| 10 |
-
weighted avg,0.9827599848543402,0.9825852782764811,0.9825441812012363,11140.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Classification_Reports/results_PROST_SVM.csv
DELETED
|
@@ -1,10 +0,0 @@
|
|
| 1 |
-
,precision,recall,f1-score,support
|
| 2 |
-
Cellwall,0.9761904761904762,0.8631578947368421,0.9162011173184358,95.0
|
| 3 |
-
Cytoplasmic,0.9921638713225186,0.9969609062025142,0.9945566044236201,7239.0
|
| 4 |
-
CytoplasmicMembrane,0.9886889460154241,0.9766378872524124,0.9826264690853347,1969.0
|
| 5 |
-
Extracellular,0.9616336633663366,0.9749058971141782,0.9682242990654205,797.0
|
| 6 |
-
OuterMembrane,0.9834862385321101,0.9657657657657658,0.9745454545454545,555.0
|
| 7 |
-
Periplasmic,0.9442148760330579,0.9422680412371134,0.9432404540763674,485.0
|
| 8 |
-
accuracy,0.9867145421903052,0.9867145421903052,0.9867145421903052,0.9867145421903052
|
| 9 |
-
macro avg,0.9743963452433206,0.9532827320514711,0.9632323997524388,11140.0
|
| 10 |
-
weighted avg,0.9867093358537257,0.9867145421903052,0.9866647214588659,11140.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Data/evaluations.csv
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
-
model,
|
| 2 |
-
Prost T5_rf,0.
|
| 3 |
-
Prost T5_svm,0.9597497960293717,0.
|
| 4 |
-
ESMC-300m_rf,0.
|
| 5 |
-
ESMC-300m_svm,0.9621974435681262,0.
|
| 6 |
-
ESMC-600m_rf,0.
|
| 7 |
-
ESMC-600m_svm,0.9602937177046506,0.
|
|
|
|
| 1 |
+
model,Recall_weighted,Precision_weighted,F1_weighted,Recall_micro,Precision_micro,F1_micro,Recall_macro,Precision_macro,F1_macro
|
| 2 |
+
Prost T5_rf,0.9477835191732391,0.9499038997569209,0.9482385032046764,0.8449867352462325,0.8483048902850722,0.8449745996773269,0.8449867352462325,0.8483048902850722,0.8449745996773269
|
| 3 |
+
Prost T5_svm,0.9597497960293717,0.9595957881278095,0.959225689183014,0.8576333576020237,0.917729657853231,0.8816910350147221,0.8576333576020237,0.917729657853231,0.8816910350147221
|
| 4 |
+
ESMC-300m_rf,0.9415284199075333,0.9410913744998538,0.9401852932227653,0.8313161599106961,0.8845168961117523,0.8557476903635252,0.8313161599106961,0.8845168961117523,0.8557476903635252
|
| 5 |
+
ESMC-300m_svm,0.9621974435681262,0.9622014817178194,0.961806189217868,0.8635980792180346,0.9057293449016118,0.8814818006164681,0.8635980792180346,0.9057293449016118,0.8814818006164681
|
| 6 |
+
ESMC-600m_rf,0.9472395974979603,0.9482829651451068,0.9465629115842034,0.8372194797694882,0.8890449692835043,0.8573570534107889,0.8372194797694882,0.8890449692835043,0.8573570534107889
|
| 7 |
+
ESMC-600m_svm,0.9602937177046506,0.9597863973858514,0.9596645033195284,0.8530897801883417,0.9115637055531502,0.8776369716697019,0.8530897801883417,0.9115637055531502,0.8776369716697019
|
Models/ESMC-300m_rf.joblib
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6f753d32b23ac48f1f3f3084ae127c7cdeae04bc8965ab5c182bd97f6b5f0446
|
| 3 |
+
size 7672329
|
Models/ESMC-300m_svm.joblib
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 18294469
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b5270c6c0a57613aeb214822256ac586f636432aac662d592148af0ec0519ae5
|
| 3 |
size 18294469
|
Models/ESMC-600m_rf.joblib
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3d378d8fd0fa931f5e99d8ac3359911974b1801481eabefca5d616a69004a29f
|
| 3 |
+
size 4371481
|
Models/ESMC-600m_svm.joblib
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 22787493
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f603530e3336b61445344f497dabcb7e2c0c115b2760f98d614e556f13d6eaad
|
| 3 |
size 22787493
|
Models/Prost T5_rf.joblib
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:503b4c0ffcc0e89abb9c319ae0b7c2e983490676dd3cd14f74a17de37357f131
|
| 3 |
+
size 22161465
|
Models/Prost T5_svm.joblib
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 18267605
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4aff572ed367bbfce4ed874058a06a987cdf5c0c629ab98bde1af651d97765ee
|
| 3 |
size 18267605
|
Plots/ESMC-300m - Random Forest_confusion_matrix.png
ADDED
|
Plots/ESMC-300m - Support Vector Machine_confusion_matrix.png
ADDED
|
Plots/ESMC-600m - Random Forest_confusion_matrix.png
ADDED
|
Plots/ESMC-600m - Support Vector Machine_confusion_matrix.png
ADDED
|
Plots/Prost T5 - Random Forest_confusion_matrix.png
ADDED
|
Plots/Prost T5 - Support Vector Machine_confusion_matrix.png
ADDED
|
notebooks/04_Training.ipynb
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79f5956b989bbe058168dec81b42ee1d48a05ba0accace96af4bd6e8c86d7f63
|
| 3 |
+
size 728565
|
src/my_utils.py
CHANGED
|
@@ -261,10 +261,18 @@ def evaluate(model: BaseEstimator,
|
|
| 261 |
result = {}
|
| 262 |
y_pred = model.predict(x_test) # type: ignore
|
| 263 |
|
| 264 |
-
|
| 265 |
-
result['
|
| 266 |
-
result['
|
| 267 |
-
result['
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
|
| 269 |
pprint(result)
|
| 270 |
return result
|
|
@@ -324,7 +332,9 @@ def train_rf(title: str,
|
|
| 324 |
y_pred_str = le.inverse_transform(y_pred)
|
| 325 |
y_test_str = le.inverse_transform(y_test)
|
| 326 |
|
| 327 |
-
confusion(title=title, y_true=y_test_str, y_pred=y_pred_str)
|
|
|
|
|
|
|
| 328 |
|
| 329 |
return classifier, evaluation, le
|
| 330 |
|
|
@@ -371,7 +381,8 @@ def train_svm(title: str, x: np.ndarray, y: np.ndarray, params: dict) -> tuple[P
|
|
| 371 |
y_pred_str = le.inverse_transform(y_pred)
|
| 372 |
y_test_str = le.inverse_transform(y_test)
|
| 373 |
|
| 374 |
-
confusion(title=title, y_true=y_test_str, y_pred=y_pred_str)
|
|
|
|
| 375 |
|
| 376 |
|
| 377 |
classification = classification_report(y_test,
|
|
|
|
| 261 |
result = {}
|
| 262 |
y_pred = model.predict(x_test) # type: ignore
|
| 263 |
|
| 264 |
+
|
| 265 |
+
result['Recall_weighted'] = recall_score(y_test, y_pred, average = 'weighted')
|
| 266 |
+
result['Precision_weighted'] = precision_score(y_test, y_pred, average='weighted')
|
| 267 |
+
result['F1_weighted'] = f1_score(y_test, y_pred, average='weighted')
|
| 268 |
+
|
| 269 |
+
result['Recall_micro'] = recall_score(y_test, y_pred, average = 'macro')
|
| 270 |
+
result['Precision_micro'] = precision_score(y_test, y_pred, average='macro')
|
| 271 |
+
result['F1_micro'] = f1_score(y_test, y_pred, average='macro')
|
| 272 |
+
|
| 273 |
+
result['Recall_macro'] = recall_score(y_test, y_pred, average = 'macro')
|
| 274 |
+
result['Precision_macro'] = precision_score(y_test, y_pred, average='macro')
|
| 275 |
+
result['F1_macro'] = f1_score(y_test, y_pred, average='macro')
|
| 276 |
|
| 277 |
pprint(result)
|
| 278 |
return result
|
|
|
|
| 332 |
y_pred_str = le.inverse_transform(y_pred)
|
| 333 |
y_test_str = le.inverse_transform(y_test)
|
| 334 |
|
| 335 |
+
fig = confusion(title=title, y_true=y_test_str, y_pred=y_pred_str)
|
| 336 |
+
|
| 337 |
+
fig.savefig(os.path.join(project_root, 'Plots', f'{title}_confusion_matrix.png'))
|
| 338 |
|
| 339 |
return classifier, evaluation, le
|
| 340 |
|
|
|
|
| 381 |
y_pred_str = le.inverse_transform(y_pred)
|
| 382 |
y_test_str = le.inverse_transform(y_test)
|
| 383 |
|
| 384 |
+
fig = confusion(title=title, y_true=y_test_str, y_pred=y_pred_str)
|
| 385 |
+
fig.savefig(os.path.join(project_root, 'Plots', f'{title}_confusion_matrix.png'))
|
| 386 |
|
| 387 |
|
| 388 |
classification = classification_report(y_test,
|