{ "created_at": "2026-05-19T20:13:44.207534+00:00", "config": { "hf_dataset": "faodl/amis-agri-soybeans", "hf_subset": null, "train_split": "train", "validation_split": "validation", "test_split": "test", "text_col": "chunk_text", "label_col": "label", "group_col": "id", "id_col": "chunk_id", "model_name": "distilbert/distilbert-base-multilingual-cased", "output_dir": "/content/agri-soybeans-classifier", "max_length": 256, "learning_rate": 2e-05, "weight_decay": 0.01, "num_train_epochs": 5.0, "per_device_train_batch_size": 16, "per_device_eval_batch_size": 32, "gradient_accumulation_steps": 1, "warmup_ratio": 0.1, "early_stopping_patience": 2, "seed": 42, "metric_for_best_model": "f1", "skip_transformer": false, "skip_baselines": false, "baseline_models": [ "logistic", "xgboost", "embedding-logistic", "embedding-svm", "embedding-lightgbm" ], "tfidf_max_features": 50000, "embedding_model_name": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "embedding_batch_size": 64, "positive_label_name": "RELEVANT", "negative_label_name": "NOT_RELEVANT", "push_to_hub": true, "hub_model_id": "faodl/agri-soybeans-classifier", "hub_private_repo": false }, "dataset_summary": { "train": { "rows": 4745, "labels": { "0": 3860, "1": 885 }, "unique_groups": 2244, "text_length_mean": 702.3989462592202, "text_length_median": 794.0 }, "validation": { "rows": 1034, "labels": { "0": 782, "1": 252 }, "unique_groups": 481, "text_length_mean": 710.2852998065764, "text_length_median": 795.0 }, "test": { "rows": 1074, "labels": { "0": 889, "1": 185 }, "unique_groups": 482, "text_length_mean": 708.6173184357542, "text_length_median": 794.0 } }, "results": [ { "model_type": "logistic_tfidf", "model_name": "logistic", "artifact_dir": "/content/agri-soybeans-classifier/baselines/logistic", "artifact_file": "/content/agri-soybeans-classifier/baselines/logistic/logistic_tfidf.joblib", "validation_best_threshold": { "threshold": 0.45412653657308116, "f1": 0.8698884758364314, "precision": 0.8181818181818182, "recall": 0.9285714285714286 }, "test_default_0_5": { "threshold": 0.5, "accuracy": 0.9441340782122905, "precision": 0.8048780487804879, "recall": 0.8918918918918919, "f1": 0.8461538461538461, "confusion_matrix": [ [ 849, 40 ], [ 20, 165 ] ], "classification_report": { "NOT_RELEVANT": { "precision": 0.9769850402761795, "recall": 0.9550056242969629, "f1-score": 0.9658703071672355, "support": 889.0 }, "RELEVANT": { "precision": 0.8048780487804879, "recall": 0.8918918918918919, "f1-score": 0.8461538461538461, "support": 185.0 }, "accuracy": 0.9441340782122905, "macro avg": { "precision": 0.8909315445283337, "recall": 0.9234487580944274, "f1-score": 0.9060120766605408, "support": 1074.0 }, "weighted avg": { "precision": 0.9473390501209625, "recall": 0.9441340782122905, "f1-score": 0.9452487566202364, "support": 1074.0 } }, "roc_auc": 0.9665278326695649, "average_precision": 0.9143444807696574 }, "test_optimal_threshold": { "threshold": 0.45412653657308116, "accuracy": 0.9413407821229051, "precision": 0.7850467289719626, "recall": 0.9081081081081082, "f1": 0.8421052631578947, "confusion_matrix": [ [ 843, 46 ], [ 17, 168 ] ], "classification_report": { "NOT_RELEVANT": { "precision": 0.9802325581395349, "recall": 0.9482564679415073, "f1-score": 0.9639794168096055, "support": 889.0 }, "RELEVANT": { "precision": 0.7850467289719626, "recall": 0.9081081081081082, "f1-score": 0.8421052631578947, "support": 185.0 }, "accuracy": 0.9413407821229051, "macro avg": { "precision": 0.8826396435557488, "recall": 0.9281822880248077, "f1-score": 0.9030423399837502, "support": 1074.0 }, "weighted avg": { "precision": 0.9466111629849717, "recall": 0.9413407821229051, "f1-score": 0.9429861966740687, "support": 1074.0 } }, "roc_auc": 0.9665278326695649, "average_precision": 0.9143444807696574 } }, { "model_type": "xgboost_tfidf", "model_name": "xgboost", "artifact_dir": "/content/agri-soybeans-classifier/baselines/xgboost", "artifact_file": "/content/agri-soybeans-classifier/baselines/xgboost/xgboost_tfidf.joblib", "validation_best_threshold": { "threshold": 0.5492749810218811, "f1": 0.9000000000000001, "precision": 0.907258064516129, "recall": 0.8928571428571429 }, "test_default_0_5": { "threshold": 0.5, "accuracy": 0.9543761638733705, "precision": 0.8953488372093024, "recall": 0.8324324324324325, "f1": 0.8627450980392157, "confusion_matrix": [ [ 871, 18 ], [ 31, 154 ] ], "classification_report": { "NOT_RELEVANT": { "precision": 0.9656319290465631, "recall": 0.9797525309336333, "f1-score": 0.972640982691234, "support": 889.0 }, "RELEVANT": { "precision": 0.8953488372093024, "recall": 0.8324324324324325, "f1-score": 0.8627450980392157, "support": 185.0 }, "accuracy": 0.9543761638733705, "macro avg": { "precision": 0.9304903831279328, "recall": 0.9060924816830329, "f1-score": 0.9176930403652248, "support": 1074.0 }, "weighted avg": { "precision": 0.9535254374358618, "recall": 0.9543761638733705, "f1-score": 0.9537110584262215, "support": 1074.0 } }, "roc_auc": 0.9635028729516919, "average_precision": 0.8960212274222826 }, "test_optimal_threshold": { "threshold": 0.5492749810218811, "accuracy": 0.9553072625698324, "precision": 0.9053254437869822, "recall": 0.827027027027027, "f1": 0.864406779661017, "confusion_matrix": [ [ 873, 16 ], [ 32, 153 ] ], "classification_report": { "NOT_RELEVANT": { "precision": 0.9646408839779006, "recall": 0.9820022497187851, "f1-score": 0.9732441471571907, "support": 889.0 }, "RELEVANT": { "precision": 0.9053254437869822, "recall": 0.827027027027027, "f1-score": 0.864406779661017, "support": 185.0 }, "accuracy": 0.9553072625698324, "macro avg": { "precision": 0.9349831638824414, "recall": 0.9045146383729061, "f1-score": 0.9188254634091038, "support": 1074.0 }, "weighted avg": { "precision": 0.9544236061051633, "recall": 0.9553072625698324, "f1-score": 0.9544965559218163, "support": 1074.0 } }, "roc_auc": 0.9635028729516919, "average_precision": 0.8960212274222826 } }, { "model_type": "embedding-logistic_sentence_embeddings", "model_name": "logistic", "embedding_model_name": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "artifact_dir": "/content/agri-soybeans-classifier/baselines/embedding-logistic", "artifact_file": "/content/agri-soybeans-classifier/baselines/embedding-logistic/embedding-logistic.joblib", "validation_best_threshold": { "threshold": 0.6469514995226564, "f1": 0.8506616257088847, "precision": 0.8122743682310469, "recall": 0.8928571428571429 }, "test_default_0_5": { "threshold": 0.5, "accuracy": 0.9385474860335196, "precision": 0.7531914893617021, "recall": 0.9567567567567568, "f1": 0.8428571428571429, "confusion_matrix": [ [ 831, 58 ], [ 8, 177 ] ], "classification_report": { "NOT_RELEVANT": { "precision": 0.9904648390941597, "recall": 0.9347581552305961, "f1-score": 0.9618055555555556, "support": 889.0 }, "RELEVANT": { "precision": 0.7531914893617021, "recall": 0.9567567567567568, "f1-score": 0.8428571428571429, "support": 185.0 }, "accuracy": 0.9385474860335196, "macro avg": { "precision": 0.8718281642279309, "recall": 0.9457574559936764, "f1-score": 0.9023313492063492, "support": 1074.0 }, "weighted avg": { "precision": 0.9495937313655706, "recall": 0.9385474860335196, "f1-score": 0.941316303833762, "support": 1074.0 } }, "roc_auc": 0.98785151856018, "average_precision": 0.951385005586427 }, "test_optimal_threshold": { "threshold": 0.6469514995226564, "accuracy": 0.9543761638733705, "precision": 0.8366336633663366, "recall": 0.9135135135135135, "f1": 0.8733850129198967, "confusion_matrix": [ [ 856, 33 ], [ 16, 169 ] ], "classification_report": { "NOT_RELEVANT": { "precision": 0.981651376146789, "recall": 0.9628796400449944, "f1-score": 0.9721749006246451, "support": 889.0 }, "RELEVANT": { "precision": 0.8366336633663366, "recall": 0.9135135135135135, "f1-score": 0.8733850129198967, "support": 185.0 }, "accuracy": 0.9543761638733705, "macro avg": { "precision": 0.9091425197565628, "recall": 0.9381965767792539, "f1-score": 0.9227799567722709, "support": 1074.0 }, "weighted avg": { "precision": 0.9566716025300445, "recall": 0.9543761638733705, "f1-score": 0.9551580205265273, "support": 1074.0 } }, "roc_auc": 0.98785151856018, "average_precision": 0.951385005586427 } }, { "model_type": "embedding-svm_sentence_embeddings", "model_name": "svm", "embedding_model_name": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "artifact_dir": "/content/agri-soybeans-classifier/baselines/embedding-svm", "artifact_file": "/content/agri-soybeans-classifier/baselines/embedding-svm/embedding-svm.joblib", "validation_best_threshold": { "threshold": 0.37877199430477043, "f1": 0.8403041825095057, "precision": 0.8065693430656934, "recall": 0.876984126984127 }, "test_default_0_5": { "threshold": 0.5, "accuracy": 0.957169459962756, "precision": 0.8839779005524862, "recall": 0.8648648648648649, "f1": 0.8743169398907104, "confusion_matrix": [ [ 868, 21 ], [ 25, 160 ] ], "classification_report": { "NOT_RELEVANT": { "precision": 0.9720044792833147, "recall": 0.9763779527559056, "f1-score": 0.9741863075196409, "support": 889.0 }, "RELEVANT": { "precision": 0.8839779005524862, "recall": 0.8648648648648649, "f1-score": 0.8743169398907104, "support": 185.0 }, "accuracy": 0.957169459962756, "macro avg": { "precision": 0.9279911899179004, "recall": 0.9206214088103852, "f1-score": 0.9242516237051757, "support": 1074.0 }, "weighted avg": { "precision": 0.956841614231915, "recall": 0.957169459962756, "f1-score": 0.9569834834867246, "support": 1074.0 } }, "roc_auc": 0.98772383181832, "average_precision": 0.9492625541246242 }, "test_optimal_threshold": { "threshold": 0.37877199430477043, "accuracy": 0.9553072625698324, "precision": 0.8477157360406091, "recall": 0.9027027027027027, "f1": 0.8743455497382199, "confusion_matrix": [ [ 859, 30 ], [ 18, 167 ] ], "classification_report": { "NOT_RELEVANT": { "precision": 0.9794754846066135, "recall": 0.9662542182227222, "f1-score": 0.9728199320498301, "support": 889.0 }, "RELEVANT": { "precision": 0.8477157360406091, "recall": 0.9027027027027027, "f1-score": 0.8743455497382199, "support": 185.0 }, "accuracy": 0.9553072625698324, "macro avg": { "precision": 0.9135956103236114, "recall": 0.9344784604627124, "f1-score": 0.9235827408940249, "support": 1074.0 }, "weighted avg": { "precision": 0.9567794385314637, "recall": 0.9553072625698324, "f1-score": 0.9558573987838637, "support": 1074.0 } }, "roc_auc": 0.98772383181832, "average_precision": 0.9492625541246242 } }, { "model_type": "embedding-lightgbm_sentence_embeddings", "model_name": "lightgbm", "embedding_model_name": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "artifact_dir": "/content/agri-soybeans-classifier/baselines/embedding-lightgbm", "artifact_file": "/content/agri-soybeans-classifier/baselines/embedding-lightgbm/embedding-lightgbm.joblib", "validation_best_threshold": { "threshold": 0.42851684417216673, "f1": 0.8473895582329317, "precision": 0.8577235772357723, "recall": 0.8373015873015873 }, "test_default_0_5": { "threshold": 0.5, "accuracy": 0.9590316573556797, "precision": 0.8938547486033519, "recall": 0.8648648648648649, "f1": 0.8791208791208791, "confusion_matrix": [ [ 870, 19 ], [ 25, 160 ] ], "classification_report": { "NOT_RELEVANT": { "precision": 0.9720670391061452, "recall": 0.9786276715410573, "f1-score": 0.9753363228699552, "support": 889.0 }, "RELEVANT": { "precision": 0.8938547486033519, "recall": 0.8648648648648649, "f1-score": 0.8791208791208791, "support": 185.0 }, "accuracy": 0.9590316573556797, "macro avg": { "precision": 0.9329608938547486, "recall": 0.9217462682029611, "f1-score": 0.9272286009954172, "support": 1074.0 }, "weighted avg": { "precision": 0.9585947171852729, "recall": 0.9590316573556797, "f1-score": 0.9587628991329169, "support": 1074.0 } }, "roc_auc": 0.9852369805125711, "average_precision": 0.9499432525904906 }, "test_optimal_threshold": { "threshold": 0.42851684417216673, "accuracy": 0.9590316573556797, "precision": 0.8895027624309392, "recall": 0.8702702702702703, "f1": 0.8797814207650273, "confusion_matrix": [ [ 869, 20 ], [ 24, 161 ] ], "classification_report": { "NOT_RELEVANT": { "precision": 0.973124300111982, "recall": 0.9775028121484814, "f1-score": 0.9753086419753086, "support": 889.0 }, "RELEVANT": { "precision": 0.8895027624309392, "recall": 0.8702702702702703, "f1-score": 0.8797814207650273, "support": 185.0 }, "accuracy": 0.9590316573556797, "macro avg": { "precision": 0.9313135312714607, "recall": 0.9238865412093759, "f1-score": 0.927545031370168, "support": 1074.0 }, "weighted avg": { "precision": 0.9587202177367556, "recall": 0.9590316573556797, "f1-score": 0.9588537668133889, "support": 1074.0 } }, "roc_auc": 0.9852369805125711, "average_precision": 0.9499432525904906 } }, { "model_type": "transformer", "model_name": "distilbert/distilbert-base-multilingual-cased", "artifact_dir": "/content/agri-soybeans-classifier/transformer", "validation_best_threshold": { "threshold": 0.4926762878894806, "f1": 0.9236790606653621, "precision": 0.9111969111969112, "recall": 0.9365079365079365 }, "test_default_0_5": { "threshold": 0.5, "accuracy": 0.9543761638733705, "precision": 0.8820224719101124, "recall": 0.8486486486486486, "f1": 0.8650137741046832, "confusion_matrix": [ [ 868, 21 ], [ 28, 157 ] ], "classification_report": { "NOT_RELEVANT": { "precision": 0.96875, "recall": 0.9763779527559056, "f1-score": 0.9725490196078431, "support": 889.0 }, "RELEVANT": { "precision": 0.8820224719101124, "recall": 0.8486486486486486, "f1-score": 0.8650137741046832, "support": 185.0 }, "accuracy": 0.9543761638733705, "macro avg": { "precision": 0.9253862359550562, "recall": 0.9125133007022771, "f1-score": 0.9187813968562631, "support": 1074.0 }, "weighted avg": { "precision": 0.9538109006549076, "recall": 0.9543761638733705, "f1-score": 0.9540257231291798, "support": 1074.0 } }, "roc_auc": 0.9756270331073482, "average_precision": 0.9289986926848062 }, "test_optimal_threshold": { "threshold": 0.4926762878894806, "accuracy": 0.9553072625698324, "precision": 0.88268156424581, "recall": 0.8540540540540541, "f1": 0.8681318681318682, "confusion_matrix": [ [ 868, 21 ], [ 27, 158 ] ], "classification_report": { "NOT_RELEVANT": { "precision": 0.9698324022346368, "recall": 0.9763779527559056, "f1-score": 0.9730941704035875, "support": 889.0 }, "RELEVANT": { "precision": 0.88268156424581, "recall": 0.8540540540540541, "f1-score": 0.8681318681318682, "support": 185.0 }, "accuracy": 0.9553072625698324, "macro avg": { "precision": 0.9262569832402234, "recall": 0.9152160034049799, "f1-score": 0.9206130192677278, "support": 1074.0 }, "weighted avg": { "precision": 0.9548203863799507, "recall": 0.9553072625698324, "f1-score": 0.9550140717813639, "support": 1074.0 } }, "roc_auc": 0.9756270331073482, "average_precision": 0.9289986926848062 } } ] }