lgsilvaesilva's picture
Upload folder using huggingface_hub
2bde1fa verified
{
"created_at": "2026-05-19T20:13:44.207534+00:00",
"config": {
"hf_dataset": "faodl/amis-agri-soybeans",
"hf_subset": null,
"train_split": "train",
"validation_split": "validation",
"test_split": "test",
"text_col": "chunk_text",
"label_col": "label",
"group_col": "id",
"id_col": "chunk_id",
"model_name": "distilbert/distilbert-base-multilingual-cased",
"output_dir": "/content/agri-soybeans-classifier",
"max_length": 256,
"learning_rate": 2e-05,
"weight_decay": 0.01,
"num_train_epochs": 5.0,
"per_device_train_batch_size": 16,
"per_device_eval_batch_size": 32,
"gradient_accumulation_steps": 1,
"warmup_ratio": 0.1,
"early_stopping_patience": 2,
"seed": 42,
"metric_for_best_model": "f1",
"skip_transformer": false,
"skip_baselines": false,
"baseline_models": [
"logistic",
"xgboost",
"embedding-logistic",
"embedding-svm",
"embedding-lightgbm"
],
"tfidf_max_features": 50000,
"embedding_model_name": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
"embedding_batch_size": 64,
"positive_label_name": "RELEVANT",
"negative_label_name": "NOT_RELEVANT",
"push_to_hub": true,
"hub_model_id": "faodl/agri-soybeans-classifier",
"hub_private_repo": false
},
"dataset_summary": {
"train": {
"rows": 4745,
"labels": {
"0": 3860,
"1": 885
},
"unique_groups": 2244,
"text_length_mean": 702.3989462592202,
"text_length_median": 794.0
},
"validation": {
"rows": 1034,
"labels": {
"0": 782,
"1": 252
},
"unique_groups": 481,
"text_length_mean": 710.2852998065764,
"text_length_median": 795.0
},
"test": {
"rows": 1074,
"labels": {
"0": 889,
"1": 185
},
"unique_groups": 482,
"text_length_mean": 708.6173184357542,
"text_length_median": 794.0
}
},
"results": [
{
"model_type": "logistic_tfidf",
"model_name": "logistic",
"artifact_dir": "/content/agri-soybeans-classifier/baselines/logistic",
"artifact_file": "/content/agri-soybeans-classifier/baselines/logistic/logistic_tfidf.joblib",
"validation_best_threshold": {
"threshold": 0.45412653657308116,
"f1": 0.8698884758364314,
"precision": 0.8181818181818182,
"recall": 0.9285714285714286
},
"test_default_0_5": {
"threshold": 0.5,
"accuracy": 0.9441340782122905,
"precision": 0.8048780487804879,
"recall": 0.8918918918918919,
"f1": 0.8461538461538461,
"confusion_matrix": [
[
849,
40
],
[
20,
165
]
],
"classification_report": {
"NOT_RELEVANT": {
"precision": 0.9769850402761795,
"recall": 0.9550056242969629,
"f1-score": 0.9658703071672355,
"support": 889.0
},
"RELEVANT": {
"precision": 0.8048780487804879,
"recall": 0.8918918918918919,
"f1-score": 0.8461538461538461,
"support": 185.0
},
"accuracy": 0.9441340782122905,
"macro avg": {
"precision": 0.8909315445283337,
"recall": 0.9234487580944274,
"f1-score": 0.9060120766605408,
"support": 1074.0
},
"weighted avg": {
"precision": 0.9473390501209625,
"recall": 0.9441340782122905,
"f1-score": 0.9452487566202364,
"support": 1074.0
}
},
"roc_auc": 0.9665278326695649,
"average_precision": 0.9143444807696574
},
"test_optimal_threshold": {
"threshold": 0.45412653657308116,
"accuracy": 0.9413407821229051,
"precision": 0.7850467289719626,
"recall": 0.9081081081081082,
"f1": 0.8421052631578947,
"confusion_matrix": [
[
843,
46
],
[
17,
168
]
],
"classification_report": {
"NOT_RELEVANT": {
"precision": 0.9802325581395349,
"recall": 0.9482564679415073,
"f1-score": 0.9639794168096055,
"support": 889.0
},
"RELEVANT": {
"precision": 0.7850467289719626,
"recall": 0.9081081081081082,
"f1-score": 0.8421052631578947,
"support": 185.0
},
"accuracy": 0.9413407821229051,
"macro avg": {
"precision": 0.8826396435557488,
"recall": 0.9281822880248077,
"f1-score": 0.9030423399837502,
"support": 1074.0
},
"weighted avg": {
"precision": 0.9466111629849717,
"recall": 0.9413407821229051,
"f1-score": 0.9429861966740687,
"support": 1074.0
}
},
"roc_auc": 0.9665278326695649,
"average_precision": 0.9143444807696574
}
},
{
"model_type": "xgboost_tfidf",
"model_name": "xgboost",
"artifact_dir": "/content/agri-soybeans-classifier/baselines/xgboost",
"artifact_file": "/content/agri-soybeans-classifier/baselines/xgboost/xgboost_tfidf.joblib",
"validation_best_threshold": {
"threshold": 0.5492749810218811,
"f1": 0.9000000000000001,
"precision": 0.907258064516129,
"recall": 0.8928571428571429
},
"test_default_0_5": {
"threshold": 0.5,
"accuracy": 0.9543761638733705,
"precision": 0.8953488372093024,
"recall": 0.8324324324324325,
"f1": 0.8627450980392157,
"confusion_matrix": [
[
871,
18
],
[
31,
154
]
],
"classification_report": {
"NOT_RELEVANT": {
"precision": 0.9656319290465631,
"recall": 0.9797525309336333,
"f1-score": 0.972640982691234,
"support": 889.0
},
"RELEVANT": {
"precision": 0.8953488372093024,
"recall": 0.8324324324324325,
"f1-score": 0.8627450980392157,
"support": 185.0
},
"accuracy": 0.9543761638733705,
"macro avg": {
"precision": 0.9304903831279328,
"recall": 0.9060924816830329,
"f1-score": 0.9176930403652248,
"support": 1074.0
},
"weighted avg": {
"precision": 0.9535254374358618,
"recall": 0.9543761638733705,
"f1-score": 0.9537110584262215,
"support": 1074.0
}
},
"roc_auc": 0.9635028729516919,
"average_precision": 0.8960212274222826
},
"test_optimal_threshold": {
"threshold": 0.5492749810218811,
"accuracy": 0.9553072625698324,
"precision": 0.9053254437869822,
"recall": 0.827027027027027,
"f1": 0.864406779661017,
"confusion_matrix": [
[
873,
16
],
[
32,
153
]
],
"classification_report": {
"NOT_RELEVANT": {
"precision": 0.9646408839779006,
"recall": 0.9820022497187851,
"f1-score": 0.9732441471571907,
"support": 889.0
},
"RELEVANT": {
"precision": 0.9053254437869822,
"recall": 0.827027027027027,
"f1-score": 0.864406779661017,
"support": 185.0
},
"accuracy": 0.9553072625698324,
"macro avg": {
"precision": 0.9349831638824414,
"recall": 0.9045146383729061,
"f1-score": 0.9188254634091038,
"support": 1074.0
},
"weighted avg": {
"precision": 0.9544236061051633,
"recall": 0.9553072625698324,
"f1-score": 0.9544965559218163,
"support": 1074.0
}
},
"roc_auc": 0.9635028729516919,
"average_precision": 0.8960212274222826
}
},
{
"model_type": "embedding-logistic_sentence_embeddings",
"model_name": "logistic",
"embedding_model_name": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
"artifact_dir": "/content/agri-soybeans-classifier/baselines/embedding-logistic",
"artifact_file": "/content/agri-soybeans-classifier/baselines/embedding-logistic/embedding-logistic.joblib",
"validation_best_threshold": {
"threshold": 0.6469514995226564,
"f1": 0.8506616257088847,
"precision": 0.8122743682310469,
"recall": 0.8928571428571429
},
"test_default_0_5": {
"threshold": 0.5,
"accuracy": 0.9385474860335196,
"precision": 0.7531914893617021,
"recall": 0.9567567567567568,
"f1": 0.8428571428571429,
"confusion_matrix": [
[
831,
58
],
[
8,
177
]
],
"classification_report": {
"NOT_RELEVANT": {
"precision": 0.9904648390941597,
"recall": 0.9347581552305961,
"f1-score": 0.9618055555555556,
"support": 889.0
},
"RELEVANT": {
"precision": 0.7531914893617021,
"recall": 0.9567567567567568,
"f1-score": 0.8428571428571429,
"support": 185.0
},
"accuracy": 0.9385474860335196,
"macro avg": {
"precision": 0.8718281642279309,
"recall": 0.9457574559936764,
"f1-score": 0.9023313492063492,
"support": 1074.0
},
"weighted avg": {
"precision": 0.9495937313655706,
"recall": 0.9385474860335196,
"f1-score": 0.941316303833762,
"support": 1074.0
}
},
"roc_auc": 0.98785151856018,
"average_precision": 0.951385005586427
},
"test_optimal_threshold": {
"threshold": 0.6469514995226564,
"accuracy": 0.9543761638733705,
"precision": 0.8366336633663366,
"recall": 0.9135135135135135,
"f1": 0.8733850129198967,
"confusion_matrix": [
[
856,
33
],
[
16,
169
]
],
"classification_report": {
"NOT_RELEVANT": {
"precision": 0.981651376146789,
"recall": 0.9628796400449944,
"f1-score": 0.9721749006246451,
"support": 889.0
},
"RELEVANT": {
"precision": 0.8366336633663366,
"recall": 0.9135135135135135,
"f1-score": 0.8733850129198967,
"support": 185.0
},
"accuracy": 0.9543761638733705,
"macro avg": {
"precision": 0.9091425197565628,
"recall": 0.9381965767792539,
"f1-score": 0.9227799567722709,
"support": 1074.0
},
"weighted avg": {
"precision": 0.9566716025300445,
"recall": 0.9543761638733705,
"f1-score": 0.9551580205265273,
"support": 1074.0
}
},
"roc_auc": 0.98785151856018,
"average_precision": 0.951385005586427
}
},
{
"model_type": "embedding-svm_sentence_embeddings",
"model_name": "svm",
"embedding_model_name": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
"artifact_dir": "/content/agri-soybeans-classifier/baselines/embedding-svm",
"artifact_file": "/content/agri-soybeans-classifier/baselines/embedding-svm/embedding-svm.joblib",
"validation_best_threshold": {
"threshold": 0.37877199430477043,
"f1": 0.8403041825095057,
"precision": 0.8065693430656934,
"recall": 0.876984126984127
},
"test_default_0_5": {
"threshold": 0.5,
"accuracy": 0.957169459962756,
"precision": 0.8839779005524862,
"recall": 0.8648648648648649,
"f1": 0.8743169398907104,
"confusion_matrix": [
[
868,
21
],
[
25,
160
]
],
"classification_report": {
"NOT_RELEVANT": {
"precision": 0.9720044792833147,
"recall": 0.9763779527559056,
"f1-score": 0.9741863075196409,
"support": 889.0
},
"RELEVANT": {
"precision": 0.8839779005524862,
"recall": 0.8648648648648649,
"f1-score": 0.8743169398907104,
"support": 185.0
},
"accuracy": 0.957169459962756,
"macro avg": {
"precision": 0.9279911899179004,
"recall": 0.9206214088103852,
"f1-score": 0.9242516237051757,
"support": 1074.0
},
"weighted avg": {
"precision": 0.956841614231915,
"recall": 0.957169459962756,
"f1-score": 0.9569834834867246,
"support": 1074.0
}
},
"roc_auc": 0.98772383181832,
"average_precision": 0.9492625541246242
},
"test_optimal_threshold": {
"threshold": 0.37877199430477043,
"accuracy": 0.9553072625698324,
"precision": 0.8477157360406091,
"recall": 0.9027027027027027,
"f1": 0.8743455497382199,
"confusion_matrix": [
[
859,
30
],
[
18,
167
]
],
"classification_report": {
"NOT_RELEVANT": {
"precision": 0.9794754846066135,
"recall": 0.9662542182227222,
"f1-score": 0.9728199320498301,
"support": 889.0
},
"RELEVANT": {
"precision": 0.8477157360406091,
"recall": 0.9027027027027027,
"f1-score": 0.8743455497382199,
"support": 185.0
},
"accuracy": 0.9553072625698324,
"macro avg": {
"precision": 0.9135956103236114,
"recall": 0.9344784604627124,
"f1-score": 0.9235827408940249,
"support": 1074.0
},
"weighted avg": {
"precision": 0.9567794385314637,
"recall": 0.9553072625698324,
"f1-score": 0.9558573987838637,
"support": 1074.0
}
},
"roc_auc": 0.98772383181832,
"average_precision": 0.9492625541246242
}
},
{
"model_type": "embedding-lightgbm_sentence_embeddings",
"model_name": "lightgbm",
"embedding_model_name": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
"artifact_dir": "/content/agri-soybeans-classifier/baselines/embedding-lightgbm",
"artifact_file": "/content/agri-soybeans-classifier/baselines/embedding-lightgbm/embedding-lightgbm.joblib",
"validation_best_threshold": {
"threshold": 0.42851684417216673,
"f1": 0.8473895582329317,
"precision": 0.8577235772357723,
"recall": 0.8373015873015873
},
"test_default_0_5": {
"threshold": 0.5,
"accuracy": 0.9590316573556797,
"precision": 0.8938547486033519,
"recall": 0.8648648648648649,
"f1": 0.8791208791208791,
"confusion_matrix": [
[
870,
19
],
[
25,
160
]
],
"classification_report": {
"NOT_RELEVANT": {
"precision": 0.9720670391061452,
"recall": 0.9786276715410573,
"f1-score": 0.9753363228699552,
"support": 889.0
},
"RELEVANT": {
"precision": 0.8938547486033519,
"recall": 0.8648648648648649,
"f1-score": 0.8791208791208791,
"support": 185.0
},
"accuracy": 0.9590316573556797,
"macro avg": {
"precision": 0.9329608938547486,
"recall": 0.9217462682029611,
"f1-score": 0.9272286009954172,
"support": 1074.0
},
"weighted avg": {
"precision": 0.9585947171852729,
"recall": 0.9590316573556797,
"f1-score": 0.9587628991329169,
"support": 1074.0
}
},
"roc_auc": 0.9852369805125711,
"average_precision": 0.9499432525904906
},
"test_optimal_threshold": {
"threshold": 0.42851684417216673,
"accuracy": 0.9590316573556797,
"precision": 0.8895027624309392,
"recall": 0.8702702702702703,
"f1": 0.8797814207650273,
"confusion_matrix": [
[
869,
20
],
[
24,
161
]
],
"classification_report": {
"NOT_RELEVANT": {
"precision": 0.973124300111982,
"recall": 0.9775028121484814,
"f1-score": 0.9753086419753086,
"support": 889.0
},
"RELEVANT": {
"precision": 0.8895027624309392,
"recall": 0.8702702702702703,
"f1-score": 0.8797814207650273,
"support": 185.0
},
"accuracy": 0.9590316573556797,
"macro avg": {
"precision": 0.9313135312714607,
"recall": 0.9238865412093759,
"f1-score": 0.927545031370168,
"support": 1074.0
},
"weighted avg": {
"precision": 0.9587202177367556,
"recall": 0.9590316573556797,
"f1-score": 0.9588537668133889,
"support": 1074.0
}
},
"roc_auc": 0.9852369805125711,
"average_precision": 0.9499432525904906
}
},
{
"model_type": "transformer",
"model_name": "distilbert/distilbert-base-multilingual-cased",
"artifact_dir": "/content/agri-soybeans-classifier/transformer",
"validation_best_threshold": {
"threshold": 0.4926762878894806,
"f1": 0.9236790606653621,
"precision": 0.9111969111969112,
"recall": 0.9365079365079365
},
"test_default_0_5": {
"threshold": 0.5,
"accuracy": 0.9543761638733705,
"precision": 0.8820224719101124,
"recall": 0.8486486486486486,
"f1": 0.8650137741046832,
"confusion_matrix": [
[
868,
21
],
[
28,
157
]
],
"classification_report": {
"NOT_RELEVANT": {
"precision": 0.96875,
"recall": 0.9763779527559056,
"f1-score": 0.9725490196078431,
"support": 889.0
},
"RELEVANT": {
"precision": 0.8820224719101124,
"recall": 0.8486486486486486,
"f1-score": 0.8650137741046832,
"support": 185.0
},
"accuracy": 0.9543761638733705,
"macro avg": {
"precision": 0.9253862359550562,
"recall": 0.9125133007022771,
"f1-score": 0.9187813968562631,
"support": 1074.0
},
"weighted avg": {
"precision": 0.9538109006549076,
"recall": 0.9543761638733705,
"f1-score": 0.9540257231291798,
"support": 1074.0
}
},
"roc_auc": 0.9756270331073482,
"average_precision": 0.9289986926848062
},
"test_optimal_threshold": {
"threshold": 0.4926762878894806,
"accuracy": 0.9553072625698324,
"precision": 0.88268156424581,
"recall": 0.8540540540540541,
"f1": 0.8681318681318682,
"confusion_matrix": [
[
868,
21
],
[
27,
158
]
],
"classification_report": {
"NOT_RELEVANT": {
"precision": 0.9698324022346368,
"recall": 0.9763779527559056,
"f1-score": 0.9730941704035875,
"support": 889.0
},
"RELEVANT": {
"precision": 0.88268156424581,
"recall": 0.8540540540540541,
"f1-score": 0.8681318681318682,
"support": 185.0
},
"accuracy": 0.9553072625698324,
"macro avg": {
"precision": 0.9262569832402234,
"recall": 0.9152160034049799,
"f1-score": 0.9206130192677278,
"support": 1074.0
},
"weighted avg": {
"precision": 0.9548203863799507,
"recall": 0.9553072625698324,
"f1-score": 0.9550140717813639,
"support": 1074.0
}
},
"roc_auc": 0.9756270331073482,
"average_precision": 0.9289986926848062
}
}
]
}