Text Classification
Transformers
Joblib
Safetensors
multilingual
binary-classification
amis
agriculture
Instructions to use faodl/agri-utilization-classifier with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use faodl/agri-utilization-classifier with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-classification", model="faodl/agri-utilization-classifier")# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("faodl/agri-utilization-classifier", dtype="auto") - Notebooks
- Google Colab
- Kaggle
Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +6 -0
- README.md +287 -0
- REPORT.md +140 -0
- baselines/embedding-lightgbm/embedding-lightgbm.joblib +3 -0
- baselines/embedding-lightgbm/test_predictions.csv +0 -0
- baselines/embedding-lightgbm/validation_predictions.csv +0 -0
- baselines/embedding-logistic/embedding-logistic.joblib +3 -0
- baselines/embedding-logistic/test_predictions.csv +0 -0
- baselines/embedding-logistic/validation_predictions.csv +0 -0
- baselines/embedding-svm/embedding-svm.joblib +3 -0
- baselines/embedding-svm/test_predictions.csv +0 -0
- baselines/embedding-svm/validation_predictions.csv +0 -0
- baselines/logistic/logistic_tfidf.joblib +3 -0
- baselines/logistic/test_predictions.csv +0 -0
- baselines/logistic/validation_predictions.csv +0 -0
- baselines/xgboost/test_predictions.csv +0 -0
- baselines/xgboost/validation_predictions.csv +0 -0
- baselines/xgboost/xgboost_tfidf.joblib +3 -0
- report.json +704 -0
- transformer/checkpoint-1220/config.json +39 -0
- transformer/checkpoint-1220/model.safetensors +3 -0
- transformer/checkpoint-1220/optimizer.pt +3 -0
- transformer/checkpoint-1220/rng_state.pth +3 -0
- transformer/checkpoint-1220/scaler.pt +3 -0
- transformer/checkpoint-1220/scheduler.pt +3 -0
- transformer/checkpoint-1220/tokenizer.json +3 -0
- transformer/checkpoint-1220/tokenizer_config.json +15 -0
- transformer/checkpoint-1220/trainer_state.json +431 -0
- transformer/checkpoint-1220/training_args.bin +3 -0
- transformer/checkpoint-1525/config.json +39 -0
- transformer/checkpoint-1525/model.safetensors +3 -0
- transformer/checkpoint-1525/optimizer.pt +3 -0
- transformer/checkpoint-1525/rng_state.pth +3 -0
- transformer/checkpoint-1525/scaler.pt +3 -0
- transformer/checkpoint-1525/scheduler.pt +3 -0
- transformer/checkpoint-1525/tokenizer.json +3 -0
- transformer/checkpoint-1525/tokenizer_config.json +15 -0
- transformer/checkpoint-1525/trainer_state.json +535 -0
- transformer/checkpoint-1525/training_args.bin +3 -0
- transformer/checkpoint-305/config.json +39 -0
- transformer/checkpoint-305/model.safetensors +3 -0
- transformer/checkpoint-305/optimizer.pt +3 -0
- transformer/checkpoint-305/rng_state.pth +3 -0
- transformer/checkpoint-305/scaler.pt +3 -0
- transformer/checkpoint-305/scheduler.pt +3 -0
- transformer/checkpoint-305/tokenizer.json +3 -0
- transformer/checkpoint-305/tokenizer_config.json +15 -0
- transformer/checkpoint-305/trainer_state.json +140 -0
- transformer/checkpoint-305/training_args.bin +3 -0
- transformer/checkpoint-610/config.json +39 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
transformer/checkpoint-1220/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
transformer/checkpoint-1525/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
transformer/checkpoint-305/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
transformer/checkpoint-610/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
transformer/checkpoint-915/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
transformer/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
|
@@ -0,0 +1,287 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
library_name: transformers
|
| 3 |
+
pipeline_tag: text-classification
|
| 4 |
+
base_model: FacebookAI/xlm-roberta-base
|
| 5 |
+
tags:
|
| 6 |
+
- text-classification
|
| 7 |
+
- binary-classification
|
| 8 |
+
- amis
|
| 9 |
+
- agriculture
|
| 10 |
+
language: multilingual
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
# AMIS Commodity Classifier
|
| 14 |
+
|
| 15 |
+
This model repository contains artifacts from an AMIS commodity relevance classifier training run.
|
| 16 |
+
It includes the Transformer model, any configured TF-IDF or sentence-embedding baselines, prediction files, and the training report.
|
| 17 |
+
|
| 18 |
+
- Dataset: `faodl/amis-agri-utilization`
|
| 19 |
+
- Dataset subset: ``
|
| 20 |
+
- Text column: `chunk_text`
|
| 21 |
+
- Label column: `label`
|
| 22 |
+
- Transformer: `FacebookAI/xlm-roberta-base`
|
| 23 |
+
- Generated at: `2026-05-25T19:23:29.605062+00:00`
|
| 24 |
+
|
| 25 |
+
## Dataset Summary
|
| 26 |
+
|
| 27 |
+
| Split | Rows | Label 0 | Label 1 | Unique groups | Mean text length |
|
| 28 |
+
| --- | ---: | ---: | ---: | ---: | ---: |
|
| 29 |
+
| train | 4877 | 4347 | 530 | 2513 | 696.6 |
|
| 30 |
+
| validation | 978 | 899 | 79 | 538 | 690.6 |
|
| 31 |
+
| test | 1016 | 904 | 112 | 539 | 690.7 |
|
| 32 |
+
|
| 33 |
+
## Threshold Comparison on Test Split
|
| 34 |
+
|
| 35 |
+
| Model | Threshold | Accuracy | Precision | Recall | F1 | ROC AUC | Average precision |
|
| 36 |
+
| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
|
| 37 |
+
| logistic_tfidf | 0.500 | 0.926 | 0.691 | 0.598 | 0.641 | 0.899 | 0.726 |
|
| 38 |
+
| logistic_tfidf | 0.608 | 0.930 | 0.902 | 0.411 | 0.564 | 0.899 | 0.726 |
|
| 39 |
+
| xgboost_tfidf | 0.500 | 0.924 | 1.000 | 0.312 | 0.476 | 0.892 | 0.692 |
|
| 40 |
+
| xgboost_tfidf | 0.177 | 0.918 | 0.663 | 0.527 | 0.587 | 0.892 | 0.692 |
|
| 41 |
+
| embedding-logistic_sentence_embeddings | 0.500 | 0.899 | 0.524 | 0.866 | 0.653 | 0.952 | 0.759 |
|
| 42 |
+
| embedding-logistic_sentence_embeddings | 0.616 | 0.929 | 0.632 | 0.857 | 0.727 | 0.952 | 0.759 |
|
| 43 |
+
| embedding-svm_sentence_embeddings | 0.500 | 0.941 | 0.771 | 0.661 | 0.712 | 0.952 | 0.743 |
|
| 44 |
+
| embedding-svm_sentence_embeddings | 0.276 | 0.935 | 0.667 | 0.821 | 0.736 | 0.952 | 0.743 |
|
| 45 |
+
| embedding-lightgbm_sentence_embeddings | 0.500 | 0.946 | 0.788 | 0.696 | 0.739 | 0.959 | 0.801 |
|
| 46 |
+
| embedding-lightgbm_sentence_embeddings | 0.052 | 0.933 | 0.657 | 0.821 | 0.730 | 0.959 | 0.801 |
|
| 47 |
+
| transformer | 0.500 | 0.950 | 0.748 | 0.821 | 0.783 | 0.951 | 0.785 |
|
| 48 |
+
| transformer | 0.616 | 0.950 | 0.748 | 0.821 | 0.783 | 0.951 | 0.785 |
|
| 49 |
+
|
| 50 |
+
## Confusion Matrices on Test Split
|
| 51 |
+
|
| 52 |
+
Rows are true labels and columns are predicted labels.
|
| 53 |
+
|
| 54 |
+
### logistic_tfidf at threshold 0.500
|
| 55 |
+
|
| 56 |
+
| True / Predicted | NOT_RELEVANT | RELEVANT |
|
| 57 |
+
| --- | ---: | ---: |
|
| 58 |
+
| NOT_RELEVANT | 874 | 30 |
|
| 59 |
+
| RELEVANT | 45 | 67 |
|
| 60 |
+
|
| 61 |
+
### logistic_tfidf at threshold 0.608
|
| 62 |
+
|
| 63 |
+
| True / Predicted | NOT_RELEVANT | RELEVANT |
|
| 64 |
+
| --- | ---: | ---: |
|
| 65 |
+
| NOT_RELEVANT | 899 | 5 |
|
| 66 |
+
| RELEVANT | 66 | 46 |
|
| 67 |
+
|
| 68 |
+
### xgboost_tfidf at threshold 0.500
|
| 69 |
+
|
| 70 |
+
| True / Predicted | NOT_RELEVANT | RELEVANT |
|
| 71 |
+
| --- | ---: | ---: |
|
| 72 |
+
| NOT_RELEVANT | 904 | 0 |
|
| 73 |
+
| RELEVANT | 77 | 35 |
|
| 74 |
+
|
| 75 |
+
### xgboost_tfidf at threshold 0.177
|
| 76 |
+
|
| 77 |
+
| True / Predicted | NOT_RELEVANT | RELEVANT |
|
| 78 |
+
| --- | ---: | ---: |
|
| 79 |
+
| NOT_RELEVANT | 874 | 30 |
|
| 80 |
+
| RELEVANT | 53 | 59 |
|
| 81 |
+
|
| 82 |
+
### embedding-logistic_sentence_embeddings at threshold 0.500
|
| 83 |
+
|
| 84 |
+
| True / Predicted | NOT_RELEVANT | RELEVANT |
|
| 85 |
+
| --- | ---: | ---: |
|
| 86 |
+
| NOT_RELEVANT | 816 | 88 |
|
| 87 |
+
| RELEVANT | 15 | 97 |
|
| 88 |
+
|
| 89 |
+
### embedding-logistic_sentence_embeddings at threshold 0.616
|
| 90 |
+
|
| 91 |
+
| True / Predicted | NOT_RELEVANT | RELEVANT |
|
| 92 |
+
| --- | ---: | ---: |
|
| 93 |
+
| NOT_RELEVANT | 848 | 56 |
|
| 94 |
+
| RELEVANT | 16 | 96 |
|
| 95 |
+
|
| 96 |
+
### embedding-svm_sentence_embeddings at threshold 0.500
|
| 97 |
+
|
| 98 |
+
| True / Predicted | NOT_RELEVANT | RELEVANT |
|
| 99 |
+
| --- | ---: | ---: |
|
| 100 |
+
| NOT_RELEVANT | 882 | 22 |
|
| 101 |
+
| RELEVANT | 38 | 74 |
|
| 102 |
+
|
| 103 |
+
### embedding-svm_sentence_embeddings at threshold 0.276
|
| 104 |
+
|
| 105 |
+
| True / Predicted | NOT_RELEVANT | RELEVANT |
|
| 106 |
+
| --- | ---: | ---: |
|
| 107 |
+
| NOT_RELEVANT | 858 | 46 |
|
| 108 |
+
| RELEVANT | 20 | 92 |
|
| 109 |
+
|
| 110 |
+
### embedding-lightgbm_sentence_embeddings at threshold 0.500
|
| 111 |
+
|
| 112 |
+
| True / Predicted | NOT_RELEVANT | RELEVANT |
|
| 113 |
+
| --- | ---: | ---: |
|
| 114 |
+
| NOT_RELEVANT | 883 | 21 |
|
| 115 |
+
| RELEVANT | 34 | 78 |
|
| 116 |
+
|
| 117 |
+
### embedding-lightgbm_sentence_embeddings at threshold 0.052
|
| 118 |
+
|
| 119 |
+
| True / Predicted | NOT_RELEVANT | RELEVANT |
|
| 120 |
+
| --- | ---: | ---: |
|
| 121 |
+
| NOT_RELEVANT | 856 | 48 |
|
| 122 |
+
| RELEVANT | 20 | 92 |
|
| 123 |
+
|
| 124 |
+
### transformer at threshold 0.500
|
| 125 |
+
|
| 126 |
+
| True / Predicted | NOT_RELEVANT | RELEVANT |
|
| 127 |
+
| --- | ---: | ---: |
|
| 128 |
+
| NOT_RELEVANT | 873 | 31 |
|
| 129 |
+
| RELEVANT | 20 | 92 |
|
| 130 |
+
|
| 131 |
+
### transformer at threshold 0.616
|
| 132 |
+
|
| 133 |
+
| True / Predicted | NOT_RELEVANT | RELEVANT |
|
| 134 |
+
| --- | ---: | ---: |
|
| 135 |
+
| NOT_RELEVANT | 873 | 31 |
|
| 136 |
+
| RELEVANT | 20 | 92 |
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
## Validation-Tuned Thresholds
|
| 140 |
+
|
| 141 |
+
- `logistic_tfidf`: threshold `0.608` (validation F1 `0.578`); test F1 change vs 0.5: `-0.077`.
|
| 142 |
+
- `xgboost_tfidf`: threshold `0.177` (validation F1 `0.581`); test F1 change vs 0.5: `+0.111`.
|
| 143 |
+
- `embedding-logistic_sentence_embeddings`: threshold `0.616` (validation F1 `0.728`); test F1 change vs 0.5: `+0.074`.
|
| 144 |
+
- `embedding-svm_sentence_embeddings`: threshold `0.276` (validation F1 `0.731`); test F1 change vs 0.5: `+0.024`.
|
| 145 |
+
- `embedding-lightgbm_sentence_embeddings`: threshold `0.052` (validation F1 `0.739`); test F1 change vs 0.5: `-0.009`.
|
| 146 |
+
- `transformer`: threshold `0.616` (validation F1 `0.807`); test F1 change vs 0.5: `+0.000`.
|
| 147 |
+
|
| 148 |
+
## Artifacts
|
| 149 |
+
|
| 150 |
+
- `logistic_tfidf`: `/content/agri-utilization-classifier/baselines/logistic`
|
| 151 |
+
- `xgboost_tfidf`: `/content/agri-utilization-classifier/baselines/xgboost`
|
| 152 |
+
- `embedding-logistic_sentence_embeddings`: `/content/agri-utilization-classifier/baselines/embedding-logistic`
|
| 153 |
+
- `embedding-svm_sentence_embeddings`: `/content/agri-utilization-classifier/baselines/embedding-svm`
|
| 154 |
+
- `embedding-lightgbm_sentence_embeddings`: `/content/agri-utilization-classifier/baselines/embedding-lightgbm`
|
| 155 |
+
- `transformer`: `/content/agri-utilization-classifier/transformer`
|
| 156 |
+
|
| 157 |
+
## Inference
|
| 158 |
+
|
| 159 |
+
Install the runtime dependencies:
|
| 160 |
+
|
| 161 |
+
```bash
|
| 162 |
+
pip install transformers torch huggingface_hub pandas joblib scikit-learn xgboost sentence-transformers lightgbm
|
| 163 |
+
```
|
| 164 |
+
|
| 165 |
+
### Transformer
|
| 166 |
+
|
| 167 |
+
```python
|
| 168 |
+
import torch
|
| 169 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
| 170 |
+
|
| 171 |
+
MODEL_ID = "faodl/agri-utilization-classifier"
|
| 172 |
+
|
| 173 |
+
texts = [
|
| 174 |
+
"Rice export prices increased after new procurement rules were announced.",
|
| 175 |
+
"The finance ministry released its monthly fuel tax bulletin.",
|
| 176 |
+
]
|
| 177 |
+
|
| 178 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, subfolder="transformer")
|
| 179 |
+
model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID, subfolder="transformer")
|
| 180 |
+
threshold = float(getattr(model.config, "threshold", 0.5))
|
| 181 |
+
|
| 182 |
+
encoded = tokenizer(
|
| 183 |
+
texts,
|
| 184 |
+
truncation=True,
|
| 185 |
+
padding=True,
|
| 186 |
+
max_length=256,
|
| 187 |
+
return_tensors="pt",
|
| 188 |
+
)
|
| 189 |
+
|
| 190 |
+
with torch.no_grad():
|
| 191 |
+
logits = model(**encoded).logits
|
| 192 |
+
probabilities = torch.softmax(logits, dim=-1)[:, 1].tolist()
|
| 193 |
+
|
| 194 |
+
for text, probability in zip(texts, probabilities):
|
| 195 |
+
label = model.config.id2label[int(probability >= threshold)]
|
| 196 |
+
print({"text": text, "probability_positive": probability, "label": label})
|
| 197 |
+
```
|
| 198 |
+
|
| 199 |
+
### TF-IDF Baselines
|
| 200 |
+
|
| 201 |
+
Available baseline names in this run: "logistic", "xgboost".
|
| 202 |
+
|
| 203 |
+
```python
|
| 204 |
+
import json
|
| 205 |
+
import joblib
|
| 206 |
+
from huggingface_hub import hf_hub_download
|
| 207 |
+
|
| 208 |
+
MODEL_ID = "faodl/agri-utilization-classifier"
|
| 209 |
+
BASELINE = "logistic"
|
| 210 |
+
|
| 211 |
+
texts = [
|
| 212 |
+
"Maize production forecasts were revised after delayed rains.",
|
| 213 |
+
"The central bank published new exchange rate statistics.",
|
| 214 |
+
]
|
| 215 |
+
|
| 216 |
+
model_path = hf_hub_download(
|
| 217 |
+
repo_id=MODEL_ID,
|
| 218 |
+
repo_type="model",
|
| 219 |
+
filename=f"baselines/{BASELINE}/{BASELINE}_tfidf.joblib",
|
| 220 |
+
)
|
| 221 |
+
report_path = hf_hub_download(
|
| 222 |
+
repo_id=MODEL_ID,
|
| 223 |
+
repo_type="model",
|
| 224 |
+
filename="report.json",
|
| 225 |
+
)
|
| 226 |
+
|
| 227 |
+
pipeline = joblib.load(model_path)
|
| 228 |
+
with open(report_path, encoding="utf-8") as handle:
|
| 229 |
+
report = json.load(handle)
|
| 230 |
+
|
| 231 |
+
threshold = next(
|
| 232 |
+
result["validation_best_threshold"]["threshold"]
|
| 233 |
+
for result in report["results"]
|
| 234 |
+
if result["model_type"] == f"{BASELINE}_tfidf"
|
| 235 |
+
)
|
| 236 |
+
|
| 237 |
+
probabilities = pipeline.predict_proba(texts)[:, 1]
|
| 238 |
+
for text, probability in zip(texts, probabilities):
|
| 239 |
+
label = "RELEVANT" if probability >= threshold else "NOT_RELEVANT"
|
| 240 |
+
print({"text": text, "probability_positive": float(probability), "label": label})
|
| 241 |
+
```
|
| 242 |
+
|
| 243 |
+
### Sentence-Embedding Baselines
|
| 244 |
+
|
| 245 |
+
Available embedding baseline names in this run: "embedding-logistic", "embedding-svm", "embedding-lightgbm".
|
| 246 |
+
|
| 247 |
+
```python
|
| 248 |
+
import joblib
|
| 249 |
+
from huggingface_hub import hf_hub_download
|
| 250 |
+
from sentence_transformers import SentenceTransformer
|
| 251 |
+
|
| 252 |
+
MODEL_ID = "faodl/agri-utilization-classifier"
|
| 253 |
+
BASELINE = "embedding-logistic"
|
| 254 |
+
|
| 255 |
+
texts = [
|
| 256 |
+
"Wheat export inspections rose as demand from importers increased.",
|
| 257 |
+
"The sports ministry announced a new stadium renovation plan.",
|
| 258 |
+
]
|
| 259 |
+
|
| 260 |
+
model_path = hf_hub_download(
|
| 261 |
+
repo_id=MODEL_ID,
|
| 262 |
+
repo_type="model",
|
| 263 |
+
filename=f"baselines/{BASELINE}/{BASELINE}.joblib",
|
| 264 |
+
)
|
| 265 |
+
artifact = joblib.load(model_path)
|
| 266 |
+
embedding_model = SentenceTransformer(artifact["embedding_model_name"])
|
| 267 |
+
embeddings = embedding_model.encode(
|
| 268 |
+
texts,
|
| 269 |
+
batch_size=artifact.get("embedding_batch_size", 64),
|
| 270 |
+
convert_to_numpy=True,
|
| 271 |
+
normalize_embeddings=artifact.get("normalize_embeddings", True),
|
| 272 |
+
)
|
| 273 |
+
probabilities = artifact["classifier"].predict_proba(embeddings)[:, 1]
|
| 274 |
+
threshold = artifact["validation_best_threshold"]["threshold"]
|
| 275 |
+
|
| 276 |
+
for text, probability in zip(texts, probabilities):
|
| 277 |
+
label = "RELEVANT" if probability >= threshold else "NOT_RELEVANT"
|
| 278 |
+
print({"text": text, "probability_positive": float(probability), "label": label})
|
| 279 |
+
```
|
| 280 |
+
|
| 281 |
+
## Files
|
| 282 |
+
|
| 283 |
+
- `REPORT.md`: Markdown report for this training run.
|
| 284 |
+
- `report.json`: Machine-readable report containing metrics and thresholds.
|
| 285 |
+
- `transformer/`: Fine-tuned Transformer artifacts, when Transformer training is enabled.
|
| 286 |
+
- `baselines/`: TF-IDF and sentence-embedding baseline artifacts, when baseline training is enabled.
|
| 287 |
+
- `*/validation_predictions.csv` and `*/test_predictions.csv`: Split-level predictions.
|
REPORT.md
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# AMIS Commodity Classifier Training Report
|
| 2 |
+
|
| 3 |
+
- Dataset: `faodl/amis-agri-utilization`
|
| 4 |
+
- Dataset subset: ``
|
| 5 |
+
- Text column: `chunk_text`
|
| 6 |
+
- Label column: `label`
|
| 7 |
+
- Transformer: `FacebookAI/xlm-roberta-base`
|
| 8 |
+
- Generated at: `2026-05-25T19:23:29.605062+00:00`
|
| 9 |
+
|
| 10 |
+
## Dataset Summary
|
| 11 |
+
|
| 12 |
+
| Split | Rows | Label 0 | Label 1 | Unique groups | Mean text length |
|
| 13 |
+
| --- | ---: | ---: | ---: | ---: | ---: |
|
| 14 |
+
| train | 4877 | 4347 | 530 | 2513 | 696.6 |
|
| 15 |
+
| validation | 978 | 899 | 79 | 538 | 690.6 |
|
| 16 |
+
| test | 1016 | 904 | 112 | 539 | 690.7 |
|
| 17 |
+
|
| 18 |
+
## Threshold Comparison on Test Split
|
| 19 |
+
|
| 20 |
+
| Model | Threshold | Accuracy | Precision | Recall | F1 | ROC AUC | Average precision |
|
| 21 |
+
| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
|
| 22 |
+
| logistic_tfidf | 0.500 | 0.926 | 0.691 | 0.598 | 0.641 | 0.899 | 0.726 |
|
| 23 |
+
| logistic_tfidf | 0.608 | 0.930 | 0.902 | 0.411 | 0.564 | 0.899 | 0.726 |
|
| 24 |
+
| xgboost_tfidf | 0.500 | 0.924 | 1.000 | 0.312 | 0.476 | 0.892 | 0.692 |
|
| 25 |
+
| xgboost_tfidf | 0.177 | 0.918 | 0.663 | 0.527 | 0.587 | 0.892 | 0.692 |
|
| 26 |
+
| embedding-logistic_sentence_embeddings | 0.500 | 0.899 | 0.524 | 0.866 | 0.653 | 0.952 | 0.759 |
|
| 27 |
+
| embedding-logistic_sentence_embeddings | 0.616 | 0.929 | 0.632 | 0.857 | 0.727 | 0.952 | 0.759 |
|
| 28 |
+
| embedding-svm_sentence_embeddings | 0.500 | 0.941 | 0.771 | 0.661 | 0.712 | 0.952 | 0.743 |
|
| 29 |
+
| embedding-svm_sentence_embeddings | 0.276 | 0.935 | 0.667 | 0.821 | 0.736 | 0.952 | 0.743 |
|
| 30 |
+
| embedding-lightgbm_sentence_embeddings | 0.500 | 0.946 | 0.788 | 0.696 | 0.739 | 0.959 | 0.801 |
|
| 31 |
+
| embedding-lightgbm_sentence_embeddings | 0.052 | 0.933 | 0.657 | 0.821 | 0.730 | 0.959 | 0.801 |
|
| 32 |
+
| transformer | 0.500 | 0.950 | 0.748 | 0.821 | 0.783 | 0.951 | 0.785 |
|
| 33 |
+
| transformer | 0.616 | 0.950 | 0.748 | 0.821 | 0.783 | 0.951 | 0.785 |
|
| 34 |
+
|
| 35 |
+
## Confusion Matrices on Test Split
|
| 36 |
+
|
| 37 |
+
Rows are true labels and columns are predicted labels.
|
| 38 |
+
|
| 39 |
+
### logistic_tfidf at threshold 0.500
|
| 40 |
+
|
| 41 |
+
| True / Predicted | NOT_RELEVANT | RELEVANT |
|
| 42 |
+
| --- | ---: | ---: |
|
| 43 |
+
| NOT_RELEVANT | 874 | 30 |
|
| 44 |
+
| RELEVANT | 45 | 67 |
|
| 45 |
+
|
| 46 |
+
### logistic_tfidf at threshold 0.608
|
| 47 |
+
|
| 48 |
+
| True / Predicted | NOT_RELEVANT | RELEVANT |
|
| 49 |
+
| --- | ---: | ---: |
|
| 50 |
+
| NOT_RELEVANT | 899 | 5 |
|
| 51 |
+
| RELEVANT | 66 | 46 |
|
| 52 |
+
|
| 53 |
+
### xgboost_tfidf at threshold 0.500
|
| 54 |
+
|
| 55 |
+
| True / Predicted | NOT_RELEVANT | RELEVANT |
|
| 56 |
+
| --- | ---: | ---: |
|
| 57 |
+
| NOT_RELEVANT | 904 | 0 |
|
| 58 |
+
| RELEVANT | 77 | 35 |
|
| 59 |
+
|
| 60 |
+
### xgboost_tfidf at threshold 0.177
|
| 61 |
+
|
| 62 |
+
| True / Predicted | NOT_RELEVANT | RELEVANT |
|
| 63 |
+
| --- | ---: | ---: |
|
| 64 |
+
| NOT_RELEVANT | 874 | 30 |
|
| 65 |
+
| RELEVANT | 53 | 59 |
|
| 66 |
+
|
| 67 |
+
### embedding-logistic_sentence_embeddings at threshold 0.500
|
| 68 |
+
|
| 69 |
+
| True / Predicted | NOT_RELEVANT | RELEVANT |
|
| 70 |
+
| --- | ---: | ---: |
|
| 71 |
+
| NOT_RELEVANT | 816 | 88 |
|
| 72 |
+
| RELEVANT | 15 | 97 |
|
| 73 |
+
|
| 74 |
+
### embedding-logistic_sentence_embeddings at threshold 0.616
|
| 75 |
+
|
| 76 |
+
| True / Predicted | NOT_RELEVANT | RELEVANT |
|
| 77 |
+
| --- | ---: | ---: |
|
| 78 |
+
| NOT_RELEVANT | 848 | 56 |
|
| 79 |
+
| RELEVANT | 16 | 96 |
|
| 80 |
+
|
| 81 |
+
### embedding-svm_sentence_embeddings at threshold 0.500
|
| 82 |
+
|
| 83 |
+
| True / Predicted | NOT_RELEVANT | RELEVANT |
|
| 84 |
+
| --- | ---: | ---: |
|
| 85 |
+
| NOT_RELEVANT | 882 | 22 |
|
| 86 |
+
| RELEVANT | 38 | 74 |
|
| 87 |
+
|
| 88 |
+
### embedding-svm_sentence_embeddings at threshold 0.276
|
| 89 |
+
|
| 90 |
+
| True / Predicted | NOT_RELEVANT | RELEVANT |
|
| 91 |
+
| --- | ---: | ---: |
|
| 92 |
+
| NOT_RELEVANT | 858 | 46 |
|
| 93 |
+
| RELEVANT | 20 | 92 |
|
| 94 |
+
|
| 95 |
+
### embedding-lightgbm_sentence_embeddings at threshold 0.500
|
| 96 |
+
|
| 97 |
+
| True / Predicted | NOT_RELEVANT | RELEVANT |
|
| 98 |
+
| --- | ---: | ---: |
|
| 99 |
+
| NOT_RELEVANT | 883 | 21 |
|
| 100 |
+
| RELEVANT | 34 | 78 |
|
| 101 |
+
|
| 102 |
+
### embedding-lightgbm_sentence_embeddings at threshold 0.052
|
| 103 |
+
|
| 104 |
+
| True / Predicted | NOT_RELEVANT | RELEVANT |
|
| 105 |
+
| --- | ---: | ---: |
|
| 106 |
+
| NOT_RELEVANT | 856 | 48 |
|
| 107 |
+
| RELEVANT | 20 | 92 |
|
| 108 |
+
|
| 109 |
+
### transformer at threshold 0.500
|
| 110 |
+
|
| 111 |
+
| True / Predicted | NOT_RELEVANT | RELEVANT |
|
| 112 |
+
| --- | ---: | ---: |
|
| 113 |
+
| NOT_RELEVANT | 873 | 31 |
|
| 114 |
+
| RELEVANT | 20 | 92 |
|
| 115 |
+
|
| 116 |
+
### transformer at threshold 0.616
|
| 117 |
+
|
| 118 |
+
| True / Predicted | NOT_RELEVANT | RELEVANT |
|
| 119 |
+
| --- | ---: | ---: |
|
| 120 |
+
| NOT_RELEVANT | 873 | 31 |
|
| 121 |
+
| RELEVANT | 20 | 92 |
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
## Validation-Tuned Thresholds
|
| 125 |
+
|
| 126 |
+
- `logistic_tfidf`: threshold `0.608` (validation F1 `0.578`); test F1 change vs 0.5: `-0.077`.
|
| 127 |
+
- `xgboost_tfidf`: threshold `0.177` (validation F1 `0.581`); test F1 change vs 0.5: `+0.111`.
|
| 128 |
+
- `embedding-logistic_sentence_embeddings`: threshold `0.616` (validation F1 `0.728`); test F1 change vs 0.5: `+0.074`.
|
| 129 |
+
- `embedding-svm_sentence_embeddings`: threshold `0.276` (validation F1 `0.731`); test F1 change vs 0.5: `+0.024`.
|
| 130 |
+
- `embedding-lightgbm_sentence_embeddings`: threshold `0.052` (validation F1 `0.739`); test F1 change vs 0.5: `-0.009`.
|
| 131 |
+
- `transformer`: threshold `0.616` (validation F1 `0.807`); test F1 change vs 0.5: `+0.000`.
|
| 132 |
+
|
| 133 |
+
## Artifacts
|
| 134 |
+
|
| 135 |
+
- `logistic_tfidf`: `/content/agri-utilization-classifier/baselines/logistic`
|
| 136 |
+
- `xgboost_tfidf`: `/content/agri-utilization-classifier/baselines/xgboost`
|
| 137 |
+
- `embedding-logistic_sentence_embeddings`: `/content/agri-utilization-classifier/baselines/embedding-logistic`
|
| 138 |
+
- `embedding-svm_sentence_embeddings`: `/content/agri-utilization-classifier/baselines/embedding-svm`
|
| 139 |
+
- `embedding-lightgbm_sentence_embeddings`: `/content/agri-utilization-classifier/baselines/embedding-lightgbm`
|
| 140 |
+
- `transformer`: `/content/agri-utilization-classifier/transformer`
|
baselines/embedding-lightgbm/embedding-lightgbm.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a14be333902e726d49155cf98ec689843edfa4320b39724da54a187bea078e8
|
| 3 |
+
size 1467460
|
baselines/embedding-lightgbm/test_predictions.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
baselines/embedding-lightgbm/validation_predictions.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
baselines/embedding-logistic/embedding-logistic.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:596282c69402bd7479f4057afeaeeec5cc81d9c13bede61569f3be96207798f0
|
| 3 |
+
size 4287
|
baselines/embedding-logistic/test_predictions.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
baselines/embedding-logistic/validation_predictions.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
baselines/embedding-svm/embedding-svm.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d4dcb68c9d78767b36ec44c943e7085a53ccbf4fc61e5568acaf2d3cf442f72e
|
| 3 |
+
size 11696
|
baselines/embedding-svm/test_predictions.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
baselines/embedding-svm/validation_predictions.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
baselines/logistic/logistic_tfidf.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:988b232ccc0c55fa1116c0885058e6200246e9dbe050debf6f5edfa81e0438e7
|
| 3 |
+
size 2452308
|
baselines/logistic/test_predictions.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
baselines/logistic/validation_predictions.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
baselines/xgboost/test_predictions.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
baselines/xgboost/validation_predictions.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
baselines/xgboost/xgboost_tfidf.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:75dae90ae561b6e87b2fd736393208127db3493eb3df7a2232490a3a60238d1b
|
| 3 |
+
size 2494551
|
report.json
ADDED
|
@@ -0,0 +1,704 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"created_at": "2026-05-25T19:23:29.605062+00:00",
|
| 3 |
+
"config": {
|
| 4 |
+
"hf_dataset": "faodl/amis-agri-utilization",
|
| 5 |
+
"hf_subset": null,
|
| 6 |
+
"train_split": "train",
|
| 7 |
+
"validation_split": "validation",
|
| 8 |
+
"test_split": "test",
|
| 9 |
+
"text_col": "chunk_text",
|
| 10 |
+
"label_col": "label",
|
| 11 |
+
"group_col": "id",
|
| 12 |
+
"id_col": "chunk_id",
|
| 13 |
+
"model_name": "FacebookAI/xlm-roberta-base",
|
| 14 |
+
"output_dir": "/content/agri-utilization-classifier",
|
| 15 |
+
"max_length": 256,
|
| 16 |
+
"learning_rate": 2e-05,
|
| 17 |
+
"weight_decay": 0.01,
|
| 18 |
+
"num_train_epochs": 5.0,
|
| 19 |
+
"per_device_train_batch_size": 16,
|
| 20 |
+
"per_device_eval_batch_size": 32,
|
| 21 |
+
"gradient_accumulation_steps": 1,
|
| 22 |
+
"warmup_ratio": 0.1,
|
| 23 |
+
"early_stopping_patience": 2,
|
| 24 |
+
"seed": 42,
|
| 25 |
+
"metric_for_best_model": "f1",
|
| 26 |
+
"skip_transformer": false,
|
| 27 |
+
"skip_baselines": false,
|
| 28 |
+
"baseline_models": [
|
| 29 |
+
"logistic",
|
| 30 |
+
"xgboost",
|
| 31 |
+
"embedding-logistic",
|
| 32 |
+
"embedding-svm",
|
| 33 |
+
"embedding-lightgbm"
|
| 34 |
+
],
|
| 35 |
+
"tfidf_max_features": 50000,
|
| 36 |
+
"embedding_model_name": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
|
| 37 |
+
"embedding_batch_size": 64,
|
| 38 |
+
"positive_label_name": "RELEVANT",
|
| 39 |
+
"negative_label_name": "NOT_RELEVANT",
|
| 40 |
+
"push_to_hub": true,
|
| 41 |
+
"hub_model_id": "faodl/agri-utilization-classifier",
|
| 42 |
+
"hub_private_repo": false
|
| 43 |
+
},
|
| 44 |
+
"dataset_summary": {
|
| 45 |
+
"train": {
|
| 46 |
+
"rows": 4877,
|
| 47 |
+
"labels": {
|
| 48 |
+
"0": 4347,
|
| 49 |
+
"1": 530
|
| 50 |
+
},
|
| 51 |
+
"unique_groups": 2513,
|
| 52 |
+
"text_length_mean": 696.6221037523068,
|
| 53 |
+
"text_length_median": 794.0
|
| 54 |
+
},
|
| 55 |
+
"validation": {
|
| 56 |
+
"rows": 978,
|
| 57 |
+
"labels": {
|
| 58 |
+
"0": 899,
|
| 59 |
+
"1": 79
|
| 60 |
+
},
|
| 61 |
+
"unique_groups": 538,
|
| 62 |
+
"text_length_mean": 690.6196319018405,
|
| 63 |
+
"text_length_median": 794.0
|
| 64 |
+
},
|
| 65 |
+
"test": {
|
| 66 |
+
"rows": 1016,
|
| 67 |
+
"labels": {
|
| 68 |
+
"0": 904,
|
| 69 |
+
"1": 112
|
| 70 |
+
},
|
| 71 |
+
"unique_groups": 539,
|
| 72 |
+
"text_length_mean": 690.6929133858267,
|
| 73 |
+
"text_length_median": 794.0
|
| 74 |
+
}
|
| 75 |
+
},
|
| 76 |
+
"results": [
|
| 77 |
+
{
|
| 78 |
+
"model_type": "logistic_tfidf",
|
| 79 |
+
"model_name": "logistic",
|
| 80 |
+
"artifact_dir": "/content/agri-utilization-classifier/baselines/logistic",
|
| 81 |
+
"artifact_file": "/content/agri-utilization-classifier/baselines/logistic/logistic_tfidf.joblib",
|
| 82 |
+
"validation_best_threshold": {
|
| 83 |
+
"threshold": 0.6076606929552563,
|
| 84 |
+
"f1": 0.5777777777777778,
|
| 85 |
+
"precision": 0.6964285714285714,
|
| 86 |
+
"recall": 0.4936708860759494
|
| 87 |
+
},
|
| 88 |
+
"test_default_0_5": {
|
| 89 |
+
"threshold": 0.5,
|
| 90 |
+
"accuracy": 0.9261811023622047,
|
| 91 |
+
"precision": 0.6907216494845361,
|
| 92 |
+
"recall": 0.5982142857142857,
|
| 93 |
+
"f1": 0.6411483253588517,
|
| 94 |
+
"confusion_matrix": [
|
| 95 |
+
[
|
| 96 |
+
874,
|
| 97 |
+
30
|
| 98 |
+
],
|
| 99 |
+
[
|
| 100 |
+
45,
|
| 101 |
+
67
|
| 102 |
+
]
|
| 103 |
+
],
|
| 104 |
+
"classification_report": {
|
| 105 |
+
"NOT_RELEVANT": {
|
| 106 |
+
"precision": 0.9510337323177367,
|
| 107 |
+
"recall": 0.9668141592920354,
|
| 108 |
+
"f1-score": 0.9588590235874932,
|
| 109 |
+
"support": 904.0
|
| 110 |
+
},
|
| 111 |
+
"RELEVANT": {
|
| 112 |
+
"precision": 0.6907216494845361,
|
| 113 |
+
"recall": 0.5982142857142857,
|
| 114 |
+
"f1-score": 0.6411483253588517,
|
| 115 |
+
"support": 112.0
|
| 116 |
+
},
|
| 117 |
+
"accuracy": 0.9261811023622047,
|
| 118 |
+
"macro avg": {
|
| 119 |
+
"precision": 0.8208776909011364,
|
| 120 |
+
"recall": 0.7825142225031605,
|
| 121 |
+
"f1-score": 0.8000036744731724,
|
| 122 |
+
"support": 1016.0
|
| 123 |
+
},
|
| 124 |
+
"weighted avg": {
|
| 125 |
+
"precision": 0.9223379121628957,
|
| 126 |
+
"recall": 0.9261811023622047,
|
| 127 |
+
"f1-score": 0.9238357970111075,
|
| 128 |
+
"support": 1016.0
|
| 129 |
+
}
|
| 130 |
+
},
|
| 131 |
+
"roc_auc": 0.8990004740834386,
|
| 132 |
+
"average_precision": 0.7262348311700503
|
| 133 |
+
},
|
| 134 |
+
"test_optimal_threshold": {
|
| 135 |
+
"threshold": 0.6076606929552563,
|
| 136 |
+
"accuracy": 0.9301181102362205,
|
| 137 |
+
"precision": 0.9019607843137255,
|
| 138 |
+
"recall": 0.4107142857142857,
|
| 139 |
+
"f1": 0.5644171779141104,
|
| 140 |
+
"confusion_matrix": [
|
| 141 |
+
[
|
| 142 |
+
899,
|
| 143 |
+
5
|
| 144 |
+
],
|
| 145 |
+
[
|
| 146 |
+
66,
|
| 147 |
+
46
|
| 148 |
+
]
|
| 149 |
+
],
|
| 150 |
+
"classification_report": {
|
| 151 |
+
"NOT_RELEVANT": {
|
| 152 |
+
"precision": 0.9316062176165804,
|
| 153 |
+
"recall": 0.9944690265486725,
|
| 154 |
+
"f1-score": 0.962011771000535,
|
| 155 |
+
"support": 904.0
|
| 156 |
+
},
|
| 157 |
+
"RELEVANT": {
|
| 158 |
+
"precision": 0.9019607843137255,
|
| 159 |
+
"recall": 0.4107142857142857,
|
| 160 |
+
"f1-score": 0.5644171779141104,
|
| 161 |
+
"support": 112.0
|
| 162 |
+
},
|
| 163 |
+
"accuracy": 0.9301181102362205,
|
| 164 |
+
"macro avg": {
|
| 165 |
+
"precision": 0.9167835009651529,
|
| 166 |
+
"recall": 0.7025916561314791,
|
| 167 |
+
"f1-score": 0.7632144744573227,
|
| 168 |
+
"support": 1016.0
|
| 169 |
+
},
|
| 170 |
+
"weighted avg": {
|
| 171 |
+
"precision": 0.9283382170950057,
|
| 172 |
+
"recall": 0.9301181102362205,
|
| 173 |
+
"f1-score": 0.9181824457784095,
|
| 174 |
+
"support": 1016.0
|
| 175 |
+
}
|
| 176 |
+
},
|
| 177 |
+
"roc_auc": 0.8990004740834386,
|
| 178 |
+
"average_precision": 0.7262348311700503
|
| 179 |
+
}
|
| 180 |
+
},
|
| 181 |
+
{
|
| 182 |
+
"model_type": "xgboost_tfidf",
|
| 183 |
+
"model_name": "xgboost",
|
| 184 |
+
"artifact_dir": "/content/agri-utilization-classifier/baselines/xgboost",
|
| 185 |
+
"artifact_file": "/content/agri-utilization-classifier/baselines/xgboost/xgboost_tfidf.joblib",
|
| 186 |
+
"validation_best_threshold": {
|
| 187 |
+
"threshold": 0.17728303372859955,
|
| 188 |
+
"f1": 0.5806451612903226,
|
| 189 |
+
"precision": 0.5921052631578947,
|
| 190 |
+
"recall": 0.569620253164557
|
| 191 |
+
},
|
| 192 |
+
"test_default_0_5": {
|
| 193 |
+
"threshold": 0.5,
|
| 194 |
+
"accuracy": 0.9242125984251969,
|
| 195 |
+
"precision": 1.0,
|
| 196 |
+
"recall": 0.3125,
|
| 197 |
+
"f1": 0.47619047619047616,
|
| 198 |
+
"confusion_matrix": [
|
| 199 |
+
[
|
| 200 |
+
904,
|
| 201 |
+
0
|
| 202 |
+
],
|
| 203 |
+
[
|
| 204 |
+
77,
|
| 205 |
+
35
|
| 206 |
+
]
|
| 207 |
+
],
|
| 208 |
+
"classification_report": {
|
| 209 |
+
"NOT_RELEVANT": {
|
| 210 |
+
"precision": 0.9215086646279307,
|
| 211 |
+
"recall": 1.0,
|
| 212 |
+
"f1-score": 0.9591511936339523,
|
| 213 |
+
"support": 904.0
|
| 214 |
+
},
|
| 215 |
+
"RELEVANT": {
|
| 216 |
+
"precision": 1.0,
|
| 217 |
+
"recall": 0.3125,
|
| 218 |
+
"f1-score": 0.47619047619047616,
|
| 219 |
+
"support": 112.0
|
| 220 |
+
},
|
| 221 |
+
"accuracy": 0.9242125984251969,
|
| 222 |
+
"macro avg": {
|
| 223 |
+
"precision": 0.9607543323139653,
|
| 224 |
+
"recall": 0.65625,
|
| 225 |
+
"f1-score": 0.7176708349122143,
|
| 226 |
+
"support": 1016.0
|
| 227 |
+
},
|
| 228 |
+
"weighted avg": {
|
| 229 |
+
"precision": 0.9301612527791825,
|
| 230 |
+
"recall": 0.9242125984251969,
|
| 231 |
+
"f1-score": 0.905911429506325,
|
| 232 |
+
"support": 1016.0
|
| 233 |
+
}
|
| 234 |
+
},
|
| 235 |
+
"roc_auc": 0.8921114491150443,
|
| 236 |
+
"average_precision": 0.6916666494483661
|
| 237 |
+
},
|
| 238 |
+
"test_optimal_threshold": {
|
| 239 |
+
"threshold": 0.17728303372859955,
|
| 240 |
+
"accuracy": 0.9183070866141733,
|
| 241 |
+
"precision": 0.6629213483146067,
|
| 242 |
+
"recall": 0.5267857142857143,
|
| 243 |
+
"f1": 0.5870646766169154,
|
| 244 |
+
"confusion_matrix": [
|
| 245 |
+
[
|
| 246 |
+
874,
|
| 247 |
+
30
|
| 248 |
+
],
|
| 249 |
+
[
|
| 250 |
+
53,
|
| 251 |
+
59
|
| 252 |
+
]
|
| 253 |
+
],
|
| 254 |
+
"classification_report": {
|
| 255 |
+
"NOT_RELEVANT": {
|
| 256 |
+
"precision": 0.9428263214670982,
|
| 257 |
+
"recall": 0.9668141592920354,
|
| 258 |
+
"f1-score": 0.9546695794647734,
|
| 259 |
+
"support": 904.0
|
| 260 |
+
},
|
| 261 |
+
"RELEVANT": {
|
| 262 |
+
"precision": 0.6629213483146067,
|
| 263 |
+
"recall": 0.5267857142857143,
|
| 264 |
+
"f1-score": 0.5870646766169154,
|
| 265 |
+
"support": 112.0
|
| 266 |
+
},
|
| 267 |
+
"accuracy": 0.9183070866141733,
|
| 268 |
+
"macro avg": {
|
| 269 |
+
"precision": 0.8028738348908524,
|
| 270 |
+
"recall": 0.7467999367888749,
|
| 271 |
+
"f1-score": 0.7708671280408443,
|
| 272 |
+
"support": 1016.0
|
| 273 |
+
},
|
| 274 |
+
"weighted avg": {
|
| 275 |
+
"precision": 0.9119706551353274,
|
| 276 |
+
"recall": 0.9183070866141733,
|
| 277 |
+
"f1-score": 0.9141462043476867,
|
| 278 |
+
"support": 1016.0
|
| 279 |
+
}
|
| 280 |
+
},
|
| 281 |
+
"roc_auc": 0.8921114491150443,
|
| 282 |
+
"average_precision": 0.6916666494483661
|
| 283 |
+
}
|
| 284 |
+
},
|
| 285 |
+
{
|
| 286 |
+
"model_type": "embedding-logistic_sentence_embeddings",
|
| 287 |
+
"model_name": "logistic",
|
| 288 |
+
"embedding_model_name": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
|
| 289 |
+
"artifact_dir": "/content/agri-utilization-classifier/baselines/embedding-logistic",
|
| 290 |
+
"artifact_file": "/content/agri-utilization-classifier/baselines/embedding-logistic/embedding-logistic.joblib",
|
| 291 |
+
"validation_best_threshold": {
|
| 292 |
+
"threshold": 0.616087721531811,
|
| 293 |
+
"f1": 0.7282051282051282,
|
| 294 |
+
"precision": 0.6120689655172413,
|
| 295 |
+
"recall": 0.8987341772151899
|
| 296 |
+
},
|
| 297 |
+
"test_default_0_5": {
|
| 298 |
+
"threshold": 0.5,
|
| 299 |
+
"accuracy": 0.8986220472440944,
|
| 300 |
+
"precision": 0.5243243243243243,
|
| 301 |
+
"recall": 0.8660714285714286,
|
| 302 |
+
"f1": 0.6531986531986532,
|
| 303 |
+
"confusion_matrix": [
|
| 304 |
+
[
|
| 305 |
+
816,
|
| 306 |
+
88
|
| 307 |
+
],
|
| 308 |
+
[
|
| 309 |
+
15,
|
| 310 |
+
97
|
| 311 |
+
]
|
| 312 |
+
],
|
| 313 |
+
"classification_report": {
|
| 314 |
+
"NOT_RELEVANT": {
|
| 315 |
+
"precision": 0.9819494584837545,
|
| 316 |
+
"recall": 0.9026548672566371,
|
| 317 |
+
"f1-score": 0.9406340057636887,
|
| 318 |
+
"support": 904.0
|
| 319 |
+
},
|
| 320 |
+
"RELEVANT": {
|
| 321 |
+
"precision": 0.5243243243243243,
|
| 322 |
+
"recall": 0.8660714285714286,
|
| 323 |
+
"f1-score": 0.6531986531986532,
|
| 324 |
+
"support": 112.0
|
| 325 |
+
},
|
| 326 |
+
"accuracy": 0.8986220472440944,
|
| 327 |
+
"macro avg": {
|
| 328 |
+
"precision": 0.7531368914040394,
|
| 329 |
+
"recall": 0.8843631479140328,
|
| 330 |
+
"f1-score": 0.796916329481171,
|
| 331 |
+
"support": 1016.0
|
| 332 |
+
},
|
| 333 |
+
"weighted avg": {
|
| 334 |
+
"precision": 0.9315025933008252,
|
| 335 |
+
"recall": 0.8986220472440944,
|
| 336 |
+
"f1-score": 0.9089482188667557,
|
| 337 |
+
"support": 1016.0
|
| 338 |
+
}
|
| 339 |
+
},
|
| 340 |
+
"roc_auc": 0.9523842446270544,
|
| 341 |
+
"average_precision": 0.7588349048416645
|
| 342 |
+
},
|
| 343 |
+
"test_optimal_threshold": {
|
| 344 |
+
"threshold": 0.616087721531811,
|
| 345 |
+
"accuracy": 0.9291338582677166,
|
| 346 |
+
"precision": 0.631578947368421,
|
| 347 |
+
"recall": 0.8571428571428571,
|
| 348 |
+
"f1": 0.7272727272727273,
|
| 349 |
+
"confusion_matrix": [
|
| 350 |
+
[
|
| 351 |
+
848,
|
| 352 |
+
56
|
| 353 |
+
],
|
| 354 |
+
[
|
| 355 |
+
16,
|
| 356 |
+
96
|
| 357 |
+
]
|
| 358 |
+
],
|
| 359 |
+
"classification_report": {
|
| 360 |
+
"NOT_RELEVANT": {
|
| 361 |
+
"precision": 0.9814814814814815,
|
| 362 |
+
"recall": 0.9380530973451328,
|
| 363 |
+
"f1-score": 0.9592760180995475,
|
| 364 |
+
"support": 904.0
|
| 365 |
+
},
|
| 366 |
+
"RELEVANT": {
|
| 367 |
+
"precision": 0.631578947368421,
|
| 368 |
+
"recall": 0.8571428571428571,
|
| 369 |
+
"f1-score": 0.7272727272727273,
|
| 370 |
+
"support": 112.0
|
| 371 |
+
},
|
| 372 |
+
"accuracy": 0.9291338582677166,
|
| 373 |
+
"macro avg": {
|
| 374 |
+
"precision": 0.8065302144249513,
|
| 375 |
+
"recall": 0.8975979772439949,
|
| 376 |
+
"f1-score": 0.8432743726861374,
|
| 377 |
+
"support": 1016.0
|
| 378 |
+
},
|
| 379 |
+
"weighted avg": {
|
| 380 |
+
"precision": 0.9429095485871283,
|
| 381 |
+
"recall": 0.9291338582677166,
|
| 382 |
+
"f1-score": 0.9337008521816303,
|
| 383 |
+
"support": 1016.0
|
| 384 |
+
}
|
| 385 |
+
},
|
| 386 |
+
"roc_auc": 0.9523842446270544,
|
| 387 |
+
"average_precision": 0.7588349048416645
|
| 388 |
+
}
|
| 389 |
+
},
|
| 390 |
+
{
|
| 391 |
+
"model_type": "embedding-svm_sentence_embeddings",
|
| 392 |
+
"model_name": "svm",
|
| 393 |
+
"embedding_model_name": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
|
| 394 |
+
"artifact_dir": "/content/agri-utilization-classifier/baselines/embedding-svm",
|
| 395 |
+
"artifact_file": "/content/agri-utilization-classifier/baselines/embedding-svm/embedding-svm.joblib",
|
| 396 |
+
"validation_best_threshold": {
|
| 397 |
+
"threshold": 0.27629376276966117,
|
| 398 |
+
"f1": 0.7314285714285714,
|
| 399 |
+
"precision": 0.6666666666666666,
|
| 400 |
+
"recall": 0.810126582278481
|
| 401 |
+
},
|
| 402 |
+
"test_default_0_5": {
|
| 403 |
+
"threshold": 0.5,
|
| 404 |
+
"accuracy": 0.9409448818897638,
|
| 405 |
+
"precision": 0.7708333333333334,
|
| 406 |
+
"recall": 0.6607142857142857,
|
| 407 |
+
"f1": 0.7115384615384616,
|
| 408 |
+
"confusion_matrix": [
|
| 409 |
+
[
|
| 410 |
+
882,
|
| 411 |
+
22
|
| 412 |
+
],
|
| 413 |
+
[
|
| 414 |
+
38,
|
| 415 |
+
74
|
| 416 |
+
]
|
| 417 |
+
],
|
| 418 |
+
"classification_report": {
|
| 419 |
+
"NOT_RELEVANT": {
|
| 420 |
+
"precision": 0.9586956521739131,
|
| 421 |
+
"recall": 0.9756637168141593,
|
| 422 |
+
"f1-score": 0.9671052631578947,
|
| 423 |
+
"support": 904.0
|
| 424 |
+
},
|
| 425 |
+
"RELEVANT": {
|
| 426 |
+
"precision": 0.7708333333333334,
|
| 427 |
+
"recall": 0.6607142857142857,
|
| 428 |
+
"f1-score": 0.7115384615384616,
|
| 429 |
+
"support": 112.0
|
| 430 |
+
},
|
| 431 |
+
"accuracy": 0.9409448818897638,
|
| 432 |
+
"macro avg": {
|
| 433 |
+
"precision": 0.8647644927536232,
|
| 434 |
+
"recall": 0.8181890012642226,
|
| 435 |
+
"f1-score": 0.8393218623481782,
|
| 436 |
+
"support": 1016.0
|
| 437 |
+
},
|
| 438 |
+
"weighted avg": {
|
| 439 |
+
"precision": 0.9379864201757389,
|
| 440 |
+
"recall": 0.9409448818897638,
|
| 441 |
+
"f1-score": 0.9389325448691382,
|
| 442 |
+
"support": 1016.0
|
| 443 |
+
}
|
| 444 |
+
},
|
| 445 |
+
"roc_auc": 0.9517817635903919,
|
| 446 |
+
"average_precision": 0.743247391124005
|
| 447 |
+
},
|
| 448 |
+
"test_optimal_threshold": {
|
| 449 |
+
"threshold": 0.27629376276966117,
|
| 450 |
+
"accuracy": 0.9350393700787402,
|
| 451 |
+
"precision": 0.6666666666666666,
|
| 452 |
+
"recall": 0.8214285714285714,
|
| 453 |
+
"f1": 0.736,
|
| 454 |
+
"confusion_matrix": [
|
| 455 |
+
[
|
| 456 |
+
858,
|
| 457 |
+
46
|
| 458 |
+
],
|
| 459 |
+
[
|
| 460 |
+
20,
|
| 461 |
+
92
|
| 462 |
+
]
|
| 463 |
+
],
|
| 464 |
+
"classification_report": {
|
| 465 |
+
"NOT_RELEVANT": {
|
| 466 |
+
"precision": 0.9772209567198178,
|
| 467 |
+
"recall": 0.9491150442477876,
|
| 468 |
+
"f1-score": 0.9629629629629629,
|
| 469 |
+
"support": 904.0
|
| 470 |
+
},
|
| 471 |
+
"RELEVANT": {
|
| 472 |
+
"precision": 0.6666666666666666,
|
| 473 |
+
"recall": 0.8214285714285714,
|
| 474 |
+
"f1-score": 0.736,
|
| 475 |
+
"support": 112.0
|
| 476 |
+
},
|
| 477 |
+
"accuracy": 0.9350393700787402,
|
| 478 |
+
"macro avg": {
|
| 479 |
+
"precision": 0.8219438116932423,
|
| 480 |
+
"recall": 0.8852718078381795,
|
| 481 |
+
"f1-score": 0.8494814814814815,
|
| 482 |
+
"support": 1016.0
|
| 483 |
+
},
|
| 484 |
+
"weighted avg": {
|
| 485 |
+
"precision": 0.9429866255328562,
|
| 486 |
+
"recall": 0.9350393700787402,
|
| 487 |
+
"f1-score": 0.9379434237386993,
|
| 488 |
+
"support": 1016.0
|
| 489 |
+
}
|
| 490 |
+
},
|
| 491 |
+
"roc_auc": 0.9517817635903919,
|
| 492 |
+
"average_precision": 0.743247391124005
|
| 493 |
+
}
|
| 494 |
+
},
|
| 495 |
+
{
|
| 496 |
+
"model_type": "embedding-lightgbm_sentence_embeddings",
|
| 497 |
+
"model_name": "lightgbm",
|
| 498 |
+
"embedding_model_name": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
|
| 499 |
+
"artifact_dir": "/content/agri-utilization-classifier/baselines/embedding-lightgbm",
|
| 500 |
+
"artifact_file": "/content/agri-utilization-classifier/baselines/embedding-lightgbm/embedding-lightgbm.joblib",
|
| 501 |
+
"validation_best_threshold": {
|
| 502 |
+
"threshold": 0.05244099185733503,
|
| 503 |
+
"f1": 0.7386363636363636,
|
| 504 |
+
"precision": 0.6701030927835051,
|
| 505 |
+
"recall": 0.8227848101265823
|
| 506 |
+
},
|
| 507 |
+
"test_default_0_5": {
|
| 508 |
+
"threshold": 0.5,
|
| 509 |
+
"accuracy": 0.9458661417322834,
|
| 510 |
+
"precision": 0.7878787878787878,
|
| 511 |
+
"recall": 0.6964285714285714,
|
| 512 |
+
"f1": 0.7393364928909952,
|
| 513 |
+
"confusion_matrix": [
|
| 514 |
+
[
|
| 515 |
+
883,
|
| 516 |
+
21
|
| 517 |
+
],
|
| 518 |
+
[
|
| 519 |
+
34,
|
| 520 |
+
78
|
| 521 |
+
]
|
| 522 |
+
],
|
| 523 |
+
"classification_report": {
|
| 524 |
+
"NOT_RELEVANT": {
|
| 525 |
+
"precision": 0.9629225736095965,
|
| 526 |
+
"recall": 0.9767699115044248,
|
| 527 |
+
"f1-score": 0.9697968149368479,
|
| 528 |
+
"support": 904.0
|
| 529 |
+
},
|
| 530 |
+
"RELEVANT": {
|
| 531 |
+
"precision": 0.7878787878787878,
|
| 532 |
+
"recall": 0.6964285714285714,
|
| 533 |
+
"f1-score": 0.7393364928909952,
|
| 534 |
+
"support": 112.0
|
| 535 |
+
},
|
| 536 |
+
"accuracy": 0.9458661417322834,
|
| 537 |
+
"macro avg": {
|
| 538 |
+
"precision": 0.8754006807441922,
|
| 539 |
+
"recall": 0.8365992414664981,
|
| 540 |
+
"f1-score": 0.8545666539139216,
|
| 541 |
+
"support": 1016.0
|
| 542 |
+
},
|
| 543 |
+
"weighted avg": {
|
| 544 |
+
"precision": 0.9436264082534445,
|
| 545 |
+
"recall": 0.9458661417322834,
|
| 546 |
+
"f1-score": 0.9443917400656515,
|
| 547 |
+
"support": 1016.0
|
| 548 |
+
}
|
| 549 |
+
},
|
| 550 |
+
"roc_auc": 0.9585078223767383,
|
| 551 |
+
"average_precision": 0.8011064601086128
|
| 552 |
+
},
|
| 553 |
+
"test_optimal_threshold": {
|
| 554 |
+
"threshold": 0.05244099185733503,
|
| 555 |
+
"accuracy": 0.9330708661417323,
|
| 556 |
+
"precision": 0.6571428571428571,
|
| 557 |
+
"recall": 0.8214285714285714,
|
| 558 |
+
"f1": 0.7301587301587301,
|
| 559 |
+
"confusion_matrix": [
|
| 560 |
+
[
|
| 561 |
+
856,
|
| 562 |
+
48
|
| 563 |
+
],
|
| 564 |
+
[
|
| 565 |
+
20,
|
| 566 |
+
92
|
| 567 |
+
]
|
| 568 |
+
],
|
| 569 |
+
"classification_report": {
|
| 570 |
+
"NOT_RELEVANT": {
|
| 571 |
+
"precision": 0.9771689497716894,
|
| 572 |
+
"recall": 0.9469026548672567,
|
| 573 |
+
"f1-score": 0.9617977528089887,
|
| 574 |
+
"support": 904.0
|
| 575 |
+
},
|
| 576 |
+
"RELEVANT": {
|
| 577 |
+
"precision": 0.6571428571428571,
|
| 578 |
+
"recall": 0.8214285714285714,
|
| 579 |
+
"f1-score": 0.7301587301587301,
|
| 580 |
+
"support": 112.0
|
| 581 |
+
},
|
| 582 |
+
"accuracy": 0.9330708661417323,
|
| 583 |
+
"macro avg": {
|
| 584 |
+
"precision": 0.8171559034572733,
|
| 585 |
+
"recall": 0.8841656131479141,
|
| 586 |
+
"f1-score": 0.8459782414838595,
|
| 587 |
+
"support": 1016.0
|
| 588 |
+
},
|
| 589 |
+
"weighted avg": {
|
| 590 |
+
"precision": 0.9418904828677237,
|
| 591 |
+
"recall": 0.9330708661417323,
|
| 592 |
+
"f1-score": 0.936262742438094,
|
| 593 |
+
"support": 1016.0
|
| 594 |
+
}
|
| 595 |
+
},
|
| 596 |
+
"roc_auc": 0.9585078223767383,
|
| 597 |
+
"average_precision": 0.8011064601086128
|
| 598 |
+
}
|
| 599 |
+
},
|
| 600 |
+
{
|
| 601 |
+
"model_type": "transformer",
|
| 602 |
+
"model_name": "FacebookAI/xlm-roberta-base",
|
| 603 |
+
"artifact_dir": "/content/agri-utilization-classifier/transformer",
|
| 604 |
+
"validation_best_threshold": {
|
| 605 |
+
"threshold": 0.6156440377235413,
|
| 606 |
+
"f1": 0.8072289156626505,
|
| 607 |
+
"precision": 0.7701149425287356,
|
| 608 |
+
"recall": 0.8481012658227848
|
| 609 |
+
},
|
| 610 |
+
"test_default_0_5": {
|
| 611 |
+
"threshold": 0.5,
|
| 612 |
+
"accuracy": 0.9498031496062992,
|
| 613 |
+
"precision": 0.7479674796747967,
|
| 614 |
+
"recall": 0.8214285714285714,
|
| 615 |
+
"f1": 0.7829787234042553,
|
| 616 |
+
"confusion_matrix": [
|
| 617 |
+
[
|
| 618 |
+
873,
|
| 619 |
+
31
|
| 620 |
+
],
|
| 621 |
+
[
|
| 622 |
+
20,
|
| 623 |
+
92
|
| 624 |
+
]
|
| 625 |
+
],
|
| 626 |
+
"classification_report": {
|
| 627 |
+
"NOT_RELEVANT": {
|
| 628 |
+
"precision": 0.9776035834266518,
|
| 629 |
+
"recall": 0.9657079646017699,
|
| 630 |
+
"f1-score": 0.9716193656093489,
|
| 631 |
+
"support": 904.0
|
| 632 |
+
},
|
| 633 |
+
"RELEVANT": {
|
| 634 |
+
"precision": 0.7479674796747967,
|
| 635 |
+
"recall": 0.8214285714285714,
|
| 636 |
+
"f1-score": 0.7829787234042553,
|
| 637 |
+
"support": 112.0
|
| 638 |
+
},
|
| 639 |
+
"accuracy": 0.9498031496062992,
|
| 640 |
+
"macro avg": {
|
| 641 |
+
"precision": 0.8627855315507242,
|
| 642 |
+
"recall": 0.8935682680151706,
|
| 643 |
+
"f1-score": 0.8772990445068021,
|
| 644 |
+
"support": 1016.0
|
| 645 |
+
},
|
| 646 |
+
"weighted avg": {
|
| 647 |
+
"precision": 0.9522893672650299,
|
| 648 |
+
"recall": 0.9498031496062992,
|
| 649 |
+
"f1-score": 0.9508243341851654,
|
| 650 |
+
"support": 1016.0
|
| 651 |
+
}
|
| 652 |
+
},
|
| 653 |
+
"roc_auc": 0.9511694058154235,
|
| 654 |
+
"average_precision": 0.7846734208461954
|
| 655 |
+
},
|
| 656 |
+
"test_optimal_threshold": {
|
| 657 |
+
"threshold": 0.6156440377235413,
|
| 658 |
+
"accuracy": 0.9498031496062992,
|
| 659 |
+
"precision": 0.7479674796747967,
|
| 660 |
+
"recall": 0.8214285714285714,
|
| 661 |
+
"f1": 0.7829787234042553,
|
| 662 |
+
"confusion_matrix": [
|
| 663 |
+
[
|
| 664 |
+
873,
|
| 665 |
+
31
|
| 666 |
+
],
|
| 667 |
+
[
|
| 668 |
+
20,
|
| 669 |
+
92
|
| 670 |
+
]
|
| 671 |
+
],
|
| 672 |
+
"classification_report": {
|
| 673 |
+
"NOT_RELEVANT": {
|
| 674 |
+
"precision": 0.9776035834266518,
|
| 675 |
+
"recall": 0.9657079646017699,
|
| 676 |
+
"f1-score": 0.9716193656093489,
|
| 677 |
+
"support": 904.0
|
| 678 |
+
},
|
| 679 |
+
"RELEVANT": {
|
| 680 |
+
"precision": 0.7479674796747967,
|
| 681 |
+
"recall": 0.8214285714285714,
|
| 682 |
+
"f1-score": 0.7829787234042553,
|
| 683 |
+
"support": 112.0
|
| 684 |
+
},
|
| 685 |
+
"accuracy": 0.9498031496062992,
|
| 686 |
+
"macro avg": {
|
| 687 |
+
"precision": 0.8627855315507242,
|
| 688 |
+
"recall": 0.8935682680151706,
|
| 689 |
+
"f1-score": 0.8772990445068021,
|
| 690 |
+
"support": 1016.0
|
| 691 |
+
},
|
| 692 |
+
"weighted avg": {
|
| 693 |
+
"precision": 0.9522893672650299,
|
| 694 |
+
"recall": 0.9498031496062992,
|
| 695 |
+
"f1-score": 0.9508243341851654,
|
| 696 |
+
"support": 1016.0
|
| 697 |
+
}
|
| 698 |
+
},
|
| 699 |
+
"roc_auc": 0.9511694058154235,
|
| 700 |
+
"average_precision": 0.7846734208461954
|
| 701 |
+
}
|
| 702 |
+
}
|
| 703 |
+
]
|
| 704 |
+
}
|
transformer/checkpoint-1220/config.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_cross_attention": false,
|
| 3 |
+
"architectures": [
|
| 4 |
+
"XLMRobertaForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"bos_token_id": 0,
|
| 8 |
+
"classifier_dropout": null,
|
| 9 |
+
"dtype": "float32",
|
| 10 |
+
"eos_token_id": 2,
|
| 11 |
+
"hidden_act": "gelu",
|
| 12 |
+
"hidden_dropout_prob": 0.1,
|
| 13 |
+
"hidden_size": 768,
|
| 14 |
+
"id2label": {
|
| 15 |
+
"0": "NOT_RELEVANT",
|
| 16 |
+
"1": "RELEVANT"
|
| 17 |
+
},
|
| 18 |
+
"initializer_range": 0.02,
|
| 19 |
+
"intermediate_size": 3072,
|
| 20 |
+
"is_decoder": false,
|
| 21 |
+
"label2id": {
|
| 22 |
+
"NOT_RELEVANT": 0,
|
| 23 |
+
"RELEVANT": 1
|
| 24 |
+
},
|
| 25 |
+
"layer_norm_eps": 1e-05,
|
| 26 |
+
"max_position_embeddings": 514,
|
| 27 |
+
"model_type": "xlm-roberta",
|
| 28 |
+
"num_attention_heads": 12,
|
| 29 |
+
"num_hidden_layers": 12,
|
| 30 |
+
"output_past": true,
|
| 31 |
+
"pad_token_id": 1,
|
| 32 |
+
"position_embedding_type": "absolute",
|
| 33 |
+
"problem_type": "single_label_classification",
|
| 34 |
+
"tie_word_embeddings": true,
|
| 35 |
+
"transformers_version": "5.9.0",
|
| 36 |
+
"type_vocab_size": 1,
|
| 37 |
+
"use_cache": false,
|
| 38 |
+
"vocab_size": 250002
|
| 39 |
+
}
|
transformer/checkpoint-1220/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:23260da79af693d20fe8414504a730a2e1fae0128c96e784d5a388b934f65ef8
|
| 3 |
+
size 1112205008
|
transformer/checkpoint-1220/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f5656e89640a42e9a162840684d7e143689ced913e28cbc8709b4e3fcc33ea94
|
| 3 |
+
size 2224532875
|
transformer/checkpoint-1220/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e47023fdf7fee85f2c66207ee2960719b8bf1b11c2d946d75e0d2fe33113c7ce
|
| 3 |
+
size 14645
|
transformer/checkpoint-1220/scaler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ad54f7b6fbdb8393459d5a595aa5dfdf4cf4c483f044be07288464f573b4d8e
|
| 3 |
+
size 1383
|
transformer/checkpoint-1220/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f6adfd2a8e363fb5adf050a01658d698ef3da72d5e9b197063c5e3b6a0fe9333
|
| 3 |
+
size 1465
|
transformer/checkpoint-1220/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc02d42fb2a10276563109e2287cc0dbe6b595d5b3b3401c7cfeffc0b7e20270
|
| 3 |
+
size 17098351
|
transformer/checkpoint-1220/tokenizer_config.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": true,
|
| 3 |
+
"backend": "tokenizers",
|
| 4 |
+
"bos_token": "<s>",
|
| 5 |
+
"cls_token": "<s>",
|
| 6 |
+
"eos_token": "</s>",
|
| 7 |
+
"is_local": false,
|
| 8 |
+
"local_files_only": false,
|
| 9 |
+
"mask_token": "<mask>",
|
| 10 |
+
"model_max_length": 512,
|
| 11 |
+
"pad_token": "<pad>",
|
| 12 |
+
"sep_token": "</s>",
|
| 13 |
+
"tokenizer_class": "XLMRobertaTokenizer",
|
| 14 |
+
"unk_token": "<unk>"
|
| 15 |
+
}
|
transformer/checkpoint-1220/trainer_state.json
ADDED
|
@@ -0,0 +1,431 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 1220,
|
| 3 |
+
"best_metric": 0.8,
|
| 4 |
+
"best_model_checkpoint": "/content/agri-utilization-classifier/transformer/checkpoint-1220",
|
| 5 |
+
"epoch": 4.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 1220,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.08196721311475409,
|
| 14 |
+
"grad_norm": 6.984184741973877,
|
| 15 |
+
"learning_rate": 3.157894736842105e-06,
|
| 16 |
+
"loss": 0.7167730712890625,
|
| 17 |
+
"step": 25
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 0.16393442622950818,
|
| 21 |
+
"grad_norm": 9.77598762512207,
|
| 22 |
+
"learning_rate": 6.447368421052632e-06,
|
| 23 |
+
"loss": 0.5636273193359375,
|
| 24 |
+
"step": 50
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"epoch": 0.2459016393442623,
|
| 28 |
+
"grad_norm": 8.441609382629395,
|
| 29 |
+
"learning_rate": 9.736842105263159e-06,
|
| 30 |
+
"loss": 0.37406421661376954,
|
| 31 |
+
"step": 75
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 0.32786885245901637,
|
| 35 |
+
"grad_norm": 5.313694953918457,
|
| 36 |
+
"learning_rate": 1.3026315789473684e-05,
|
| 37 |
+
"loss": 0.2965927886962891,
|
| 38 |
+
"step": 100
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"epoch": 0.4098360655737705,
|
| 42 |
+
"grad_norm": 7.240467548370361,
|
| 43 |
+
"learning_rate": 1.6315789473684213e-05,
|
| 44 |
+
"loss": 0.29742313385009767,
|
| 45 |
+
"step": 125
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"epoch": 0.4918032786885246,
|
| 49 |
+
"grad_norm": 6.100603103637695,
|
| 50 |
+
"learning_rate": 1.960526315789474e-05,
|
| 51 |
+
"loss": 0.2068590545654297,
|
| 52 |
+
"step": 150
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"epoch": 0.5737704918032787,
|
| 56 |
+
"grad_norm": 4.169040679931641,
|
| 57 |
+
"learning_rate": 1.9679533867443555e-05,
|
| 58 |
+
"loss": 0.21712726593017578,
|
| 59 |
+
"step": 175
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"epoch": 0.6557377049180327,
|
| 63 |
+
"grad_norm": 4.665876865386963,
|
| 64 |
+
"learning_rate": 1.9315367807720323e-05,
|
| 65 |
+
"loss": 0.2889243125915527,
|
| 66 |
+
"step": 200
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"epoch": 0.7377049180327869,
|
| 70 |
+
"grad_norm": 1.094870924949646,
|
| 71 |
+
"learning_rate": 1.8951201747997088e-05,
|
| 72 |
+
"loss": 0.2303921699523926,
|
| 73 |
+
"step": 225
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"epoch": 0.819672131147541,
|
| 77 |
+
"grad_norm": 1.2164329290390015,
|
| 78 |
+
"learning_rate": 1.8587035688273852e-05,
|
| 79 |
+
"loss": 0.16723501205444335,
|
| 80 |
+
"step": 250
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"epoch": 0.9016393442622951,
|
| 84 |
+
"grad_norm": 2.6314468383789062,
|
| 85 |
+
"learning_rate": 1.822286962855062e-05,
|
| 86 |
+
"loss": 0.15685997009277344,
|
| 87 |
+
"step": 275
|
| 88 |
+
},
|
| 89 |
+
{
|
| 90 |
+
"epoch": 0.9836065573770492,
|
| 91 |
+
"grad_norm": 14.927972793579102,
|
| 92 |
+
"learning_rate": 1.7858703568827385e-05,
|
| 93 |
+
"loss": 0.1979808807373047,
|
| 94 |
+
"step": 300
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"epoch": 1.0,
|
| 98 |
+
"eval_accuracy": 0.9642126789366053,
|
| 99 |
+
"eval_f1": 0.7852760736196319,
|
| 100 |
+
"eval_loss": 0.10679091513156891,
|
| 101 |
+
"eval_precision": 0.7619047619047619,
|
| 102 |
+
"eval_recall": 0.810126582278481,
|
| 103 |
+
"eval_roc_auc": 0.9606665634108222,
|
| 104 |
+
"eval_runtime": 3.9998,
|
| 105 |
+
"eval_samples_per_second": 244.514,
|
| 106 |
+
"eval_steps_per_second": 7.75,
|
| 107 |
+
"step": 305
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 1.0655737704918034,
|
| 111 |
+
"grad_norm": 22.521757125854492,
|
| 112 |
+
"learning_rate": 1.7494537509104153e-05,
|
| 113 |
+
"loss": 0.1316550636291504,
|
| 114 |
+
"step": 325
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 1.1475409836065573,
|
| 118 |
+
"grad_norm": 0.22687062621116638,
|
| 119 |
+
"learning_rate": 1.7130371449380918e-05,
|
| 120 |
+
"loss": 0.2059168815612793,
|
| 121 |
+
"step": 350
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 1.2295081967213115,
|
| 125 |
+
"grad_norm": 0.657261312007904,
|
| 126 |
+
"learning_rate": 1.6766205389657686e-05,
|
| 127 |
+
"loss": 0.09930330276489258,
|
| 128 |
+
"step": 375
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 1.3114754098360657,
|
| 132 |
+
"grad_norm": 4.796896457672119,
|
| 133 |
+
"learning_rate": 1.640203932993445e-05,
|
| 134 |
+
"loss": 0.13251757621765137,
|
| 135 |
+
"step": 400
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 1.3934426229508197,
|
| 139 |
+
"grad_norm": 0.5394258499145508,
|
| 140 |
+
"learning_rate": 1.603787327021122e-05,
|
| 141 |
+
"loss": 0.17834033966064453,
|
| 142 |
+
"step": 425
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 1.4754098360655736,
|
| 146 |
+
"grad_norm": 0.4655781388282776,
|
| 147 |
+
"learning_rate": 1.5673707210487983e-05,
|
| 148 |
+
"loss": 0.122637300491333,
|
| 149 |
+
"step": 450
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 1.5573770491803278,
|
| 153 |
+
"grad_norm": 0.42849695682525635,
|
| 154 |
+
"learning_rate": 1.530954115076475e-05,
|
| 155 |
+
"loss": 0.15983641624450684,
|
| 156 |
+
"step": 475
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 1.639344262295082,
|
| 160 |
+
"grad_norm": 0.45386794209480286,
|
| 161 |
+
"learning_rate": 1.4945375091041516e-05,
|
| 162 |
+
"loss": 0.14264726638793945,
|
| 163 |
+
"step": 500
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 1.721311475409836,
|
| 167 |
+
"grad_norm": 0.6170782446861267,
|
| 168 |
+
"learning_rate": 1.4581209031318282e-05,
|
| 169 |
+
"loss": 0.18886091232299804,
|
| 170 |
+
"step": 525
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 1.8032786885245902,
|
| 174 |
+
"grad_norm": 5.4546122550964355,
|
| 175 |
+
"learning_rate": 1.4217042971595047e-05,
|
| 176 |
+
"loss": 0.14393989562988282,
|
| 177 |
+
"step": 550
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 1.8852459016393444,
|
| 181 |
+
"grad_norm": 17.419189453125,
|
| 182 |
+
"learning_rate": 1.3852876911871815e-05,
|
| 183 |
+
"loss": 0.10272212982177735,
|
| 184 |
+
"step": 575
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 1.9672131147540983,
|
| 188 |
+
"grad_norm": 1.6267497539520264,
|
| 189 |
+
"learning_rate": 1.3488710852148582e-05,
|
| 190 |
+
"loss": 0.1810975456237793,
|
| 191 |
+
"step": 600
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 2.0,
|
| 195 |
+
"eval_accuracy": 0.9580777096114519,
|
| 196 |
+
"eval_f1": 0.7759562841530054,
|
| 197 |
+
"eval_loss": 0.17126062512397766,
|
| 198 |
+
"eval_precision": 0.6826923076923077,
|
| 199 |
+
"eval_recall": 0.8987341772151899,
|
| 200 |
+
"eval_roc_auc": 0.9631939848777121,
|
| 201 |
+
"eval_runtime": 3.84,
|
| 202 |
+
"eval_samples_per_second": 254.687,
|
| 203 |
+
"eval_steps_per_second": 8.073,
|
| 204 |
+
"step": 610
|
| 205 |
+
},
|
| 206 |
+
{
|
| 207 |
+
"epoch": 2.0491803278688523,
|
| 208 |
+
"grad_norm": 1.3180276155471802,
|
| 209 |
+
"learning_rate": 1.3124544792425346e-05,
|
| 210 |
+
"loss": 0.05921304225921631,
|
| 211 |
+
"step": 625
|
| 212 |
+
},
|
| 213 |
+
{
|
| 214 |
+
"epoch": 2.1311475409836067,
|
| 215 |
+
"grad_norm": 5.675038814544678,
|
| 216 |
+
"learning_rate": 1.2760378732702113e-05,
|
| 217 |
+
"loss": 0.16824769973754883,
|
| 218 |
+
"step": 650
|
| 219 |
+
},
|
| 220 |
+
{
|
| 221 |
+
"epoch": 2.2131147540983607,
|
| 222 |
+
"grad_norm": 0.16993092000484467,
|
| 223 |
+
"learning_rate": 1.239621267297888e-05,
|
| 224 |
+
"loss": 0.12186273574829101,
|
| 225 |
+
"step": 675
|
| 226 |
+
},
|
| 227 |
+
{
|
| 228 |
+
"epoch": 2.2950819672131146,
|
| 229 |
+
"grad_norm": 1.1791695356369019,
|
| 230 |
+
"learning_rate": 1.2032046613255645e-05,
|
| 231 |
+
"loss": 0.08795836448669433,
|
| 232 |
+
"step": 700
|
| 233 |
+
},
|
| 234 |
+
{
|
| 235 |
+
"epoch": 2.3770491803278686,
|
| 236 |
+
"grad_norm": 0.07541065663099289,
|
| 237 |
+
"learning_rate": 1.1667880553532412e-05,
|
| 238 |
+
"loss": 0.10129087448120117,
|
| 239 |
+
"step": 725
|
| 240 |
+
},
|
| 241 |
+
{
|
| 242 |
+
"epoch": 2.459016393442623,
|
| 243 |
+
"grad_norm": 3.394912004470825,
|
| 244 |
+
"learning_rate": 1.1303714493809176e-05,
|
| 245 |
+
"loss": 0.14056243896484374,
|
| 246 |
+
"step": 750
|
| 247 |
+
},
|
| 248 |
+
{
|
| 249 |
+
"epoch": 2.540983606557377,
|
| 250 |
+
"grad_norm": 8.074258804321289,
|
| 251 |
+
"learning_rate": 1.0939548434085944e-05,
|
| 252 |
+
"loss": 0.06563093185424805,
|
| 253 |
+
"step": 775
|
| 254 |
+
},
|
| 255 |
+
{
|
| 256 |
+
"epoch": 2.6229508196721314,
|
| 257 |
+
"grad_norm": 12.472029685974121,
|
| 258 |
+
"learning_rate": 1.057538237436271e-05,
|
| 259 |
+
"loss": 0.09851057052612305,
|
| 260 |
+
"step": 800
|
| 261 |
+
},
|
| 262 |
+
{
|
| 263 |
+
"epoch": 2.7049180327868854,
|
| 264 |
+
"grad_norm": 0.10368915647268295,
|
| 265 |
+
"learning_rate": 1.0211216314639475e-05,
|
| 266 |
+
"loss": 0.11658324241638184,
|
| 267 |
+
"step": 825
|
| 268 |
+
},
|
| 269 |
+
{
|
| 270 |
+
"epoch": 2.7868852459016393,
|
| 271 |
+
"grad_norm": 44.263092041015625,
|
| 272 |
+
"learning_rate": 9.847050254916243e-06,
|
| 273 |
+
"loss": 0.13634946823120117,
|
| 274 |
+
"step": 850
|
| 275 |
+
},
|
| 276 |
+
{
|
| 277 |
+
"epoch": 2.8688524590163933,
|
| 278 |
+
"grad_norm": 0.07709958404302597,
|
| 279 |
+
"learning_rate": 9.482884195193008e-06,
|
| 280 |
+
"loss": 0.12144805908203125,
|
| 281 |
+
"step": 875
|
| 282 |
+
},
|
| 283 |
+
{
|
| 284 |
+
"epoch": 2.9508196721311473,
|
| 285 |
+
"grad_norm": 0.11255892366170883,
|
| 286 |
+
"learning_rate": 9.118718135469774e-06,
|
| 287 |
+
"loss": 0.11815821647644043,
|
| 288 |
+
"step": 900
|
| 289 |
+
},
|
| 290 |
+
{
|
| 291 |
+
"epoch": 3.0,
|
| 292 |
+
"eval_accuracy": 0.9601226993865031,
|
| 293 |
+
"eval_f1": 0.7868852459016393,
|
| 294 |
+
"eval_loss": 0.14711864292621613,
|
| 295 |
+
"eval_precision": 0.6923076923076923,
|
| 296 |
+
"eval_recall": 0.9113924050632911,
|
| 297 |
+
"eval_roc_auc": 0.9719449177003984,
|
| 298 |
+
"eval_runtime": 3.9043,
|
| 299 |
+
"eval_samples_per_second": 250.495,
|
| 300 |
+
"eval_steps_per_second": 7.94,
|
| 301 |
+
"step": 915
|
| 302 |
+
},
|
| 303 |
+
{
|
| 304 |
+
"epoch": 3.0327868852459017,
|
| 305 |
+
"grad_norm": 0.2808685302734375,
|
| 306 |
+
"learning_rate": 8.754552075746541e-06,
|
| 307 |
+
"loss": 0.1051255989074707,
|
| 308 |
+
"step": 925
|
| 309 |
+
},
|
| 310 |
+
{
|
| 311 |
+
"epoch": 3.1147540983606556,
|
| 312 |
+
"grad_norm": 0.07298991084098816,
|
| 313 |
+
"learning_rate": 8.390386016023307e-06,
|
| 314 |
+
"loss": 0.08817357063293457,
|
| 315 |
+
"step": 950
|
| 316 |
+
},
|
| 317 |
+
{
|
| 318 |
+
"epoch": 3.19672131147541,
|
| 319 |
+
"grad_norm": 0.049921419471502304,
|
| 320 |
+
"learning_rate": 8.026219956300074e-06,
|
| 321 |
+
"loss": 0.11110530853271484,
|
| 322 |
+
"step": 975
|
| 323 |
+
},
|
| 324 |
+
{
|
| 325 |
+
"epoch": 3.278688524590164,
|
| 326 |
+
"grad_norm": 1.874350905418396,
|
| 327 |
+
"learning_rate": 7.66205389657684e-06,
|
| 328 |
+
"loss": 0.09003183364868164,
|
| 329 |
+
"step": 1000
|
| 330 |
+
},
|
| 331 |
+
{
|
| 332 |
+
"epoch": 3.360655737704918,
|
| 333 |
+
"grad_norm": 0.09576287865638733,
|
| 334 |
+
"learning_rate": 7.2978878368536055e-06,
|
| 335 |
+
"loss": 0.05897871017456055,
|
| 336 |
+
"step": 1025
|
| 337 |
+
},
|
| 338 |
+
{
|
| 339 |
+
"epoch": 3.442622950819672,
|
| 340 |
+
"grad_norm": 20.84284019470215,
|
| 341 |
+
"learning_rate": 6.933721777130372e-06,
|
| 342 |
+
"loss": 0.06021720886230469,
|
| 343 |
+
"step": 1050
|
| 344 |
+
},
|
| 345 |
+
{
|
| 346 |
+
"epoch": 3.5245901639344264,
|
| 347 |
+
"grad_norm": 0.06452233344316483,
|
| 348 |
+
"learning_rate": 6.569555717407138e-06,
|
| 349 |
+
"loss": 0.06818977355957032,
|
| 350 |
+
"step": 1075
|
| 351 |
+
},
|
| 352 |
+
{
|
| 353 |
+
"epoch": 3.6065573770491803,
|
| 354 |
+
"grad_norm": 0.2655308246612549,
|
| 355 |
+
"learning_rate": 6.2053896576839045e-06,
|
| 356 |
+
"loss": 0.07051475524902344,
|
| 357 |
+
"step": 1100
|
| 358 |
+
},
|
| 359 |
+
{
|
| 360 |
+
"epoch": 3.6885245901639343,
|
| 361 |
+
"grad_norm": 0.05852988734841347,
|
| 362 |
+
"learning_rate": 5.84122359796067e-06,
|
| 363 |
+
"loss": 0.08089996337890625,
|
| 364 |
+
"step": 1125
|
| 365 |
+
},
|
| 366 |
+
{
|
| 367 |
+
"epoch": 3.7704918032786887,
|
| 368 |
+
"grad_norm": 13.798267364501953,
|
| 369 |
+
"learning_rate": 5.477057538237437e-06,
|
| 370 |
+
"loss": 0.0827936840057373,
|
| 371 |
+
"step": 1150
|
| 372 |
+
},
|
| 373 |
+
{
|
| 374 |
+
"epoch": 3.8524590163934427,
|
| 375 |
+
"grad_norm": 6.363399982452393,
|
| 376 |
+
"learning_rate": 5.112891478514203e-06,
|
| 377 |
+
"loss": 0.06787658214569092,
|
| 378 |
+
"step": 1175
|
| 379 |
+
},
|
| 380 |
+
{
|
| 381 |
+
"epoch": 3.9344262295081966,
|
| 382 |
+
"grad_norm": 0.059008605778217316,
|
| 383 |
+
"learning_rate": 4.748725418790969e-06,
|
| 384 |
+
"loss": 0.05120136260986328,
|
| 385 |
+
"step": 1200
|
| 386 |
+
},
|
| 387 |
+
{
|
| 388 |
+
"epoch": 4.0,
|
| 389 |
+
"eval_accuracy": 0.9662576687116564,
|
| 390 |
+
"eval_f1": 0.8,
|
| 391 |
+
"eval_loss": 0.1406129151582718,
|
| 392 |
+
"eval_precision": 0.7674418604651163,
|
| 393 |
+
"eval_recall": 0.8354430379746836,
|
| 394 |
+
"eval_roc_auc": 0.9636797566916827,
|
| 395 |
+
"eval_runtime": 3.9045,
|
| 396 |
+
"eval_samples_per_second": 250.478,
|
| 397 |
+
"eval_steps_per_second": 7.939,
|
| 398 |
+
"step": 1220
|
| 399 |
+
}
|
| 400 |
+
],
|
| 401 |
+
"logging_steps": 25,
|
| 402 |
+
"max_steps": 1525,
|
| 403 |
+
"num_input_tokens_seen": 0,
|
| 404 |
+
"num_train_epochs": 5,
|
| 405 |
+
"save_steps": 500,
|
| 406 |
+
"stateful_callbacks": {
|
| 407 |
+
"EarlyStoppingCallback": {
|
| 408 |
+
"args": {
|
| 409 |
+
"early_stopping_patience": 2,
|
| 410 |
+
"early_stopping_threshold": 0.0
|
| 411 |
+
},
|
| 412 |
+
"attributes": {
|
| 413 |
+
"early_stopping_patience_counter": 0
|
| 414 |
+
}
|
| 415 |
+
},
|
| 416 |
+
"TrainerControl": {
|
| 417 |
+
"args": {
|
| 418 |
+
"should_epoch_stop": false,
|
| 419 |
+
"should_evaluate": false,
|
| 420 |
+
"should_log": false,
|
| 421 |
+
"should_save": true,
|
| 422 |
+
"should_training_stop": false
|
| 423 |
+
},
|
| 424 |
+
"attributes": {}
|
| 425 |
+
}
|
| 426 |
+
},
|
| 427 |
+
"total_flos": 2566385233981440.0,
|
| 428 |
+
"train_batch_size": 16,
|
| 429 |
+
"trial_name": null,
|
| 430 |
+
"trial_params": null
|
| 431 |
+
}
|
transformer/checkpoint-1220/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c60366894b25ead0379e8d97e61f1123e1ad4786f5e41a8bc70f2d7bc8901f5
|
| 3 |
+
size 5329
|
transformer/checkpoint-1525/config.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_cross_attention": false,
|
| 3 |
+
"architectures": [
|
| 4 |
+
"XLMRobertaForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"bos_token_id": 0,
|
| 8 |
+
"classifier_dropout": null,
|
| 9 |
+
"dtype": "float32",
|
| 10 |
+
"eos_token_id": 2,
|
| 11 |
+
"hidden_act": "gelu",
|
| 12 |
+
"hidden_dropout_prob": 0.1,
|
| 13 |
+
"hidden_size": 768,
|
| 14 |
+
"id2label": {
|
| 15 |
+
"0": "NOT_RELEVANT",
|
| 16 |
+
"1": "RELEVANT"
|
| 17 |
+
},
|
| 18 |
+
"initializer_range": 0.02,
|
| 19 |
+
"intermediate_size": 3072,
|
| 20 |
+
"is_decoder": false,
|
| 21 |
+
"label2id": {
|
| 22 |
+
"NOT_RELEVANT": 0,
|
| 23 |
+
"RELEVANT": 1
|
| 24 |
+
},
|
| 25 |
+
"layer_norm_eps": 1e-05,
|
| 26 |
+
"max_position_embeddings": 514,
|
| 27 |
+
"model_type": "xlm-roberta",
|
| 28 |
+
"num_attention_heads": 12,
|
| 29 |
+
"num_hidden_layers": 12,
|
| 30 |
+
"output_past": true,
|
| 31 |
+
"pad_token_id": 1,
|
| 32 |
+
"position_embedding_type": "absolute",
|
| 33 |
+
"problem_type": "single_label_classification",
|
| 34 |
+
"tie_word_embeddings": true,
|
| 35 |
+
"transformers_version": "5.9.0",
|
| 36 |
+
"type_vocab_size": 1,
|
| 37 |
+
"use_cache": false,
|
| 38 |
+
"vocab_size": 250002
|
| 39 |
+
}
|
transformer/checkpoint-1525/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8f09e4daa3612b0a08afd0486aa5ce25a384c7e9c3abe05df4b6d1f68e5033b8
|
| 3 |
+
size 1112205008
|
transformer/checkpoint-1525/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a0ce2fdbe5bad58a74bc9f05881c3817b46567f474f1c364e65e73b116534a13
|
| 3 |
+
size 2224532875
|
transformer/checkpoint-1525/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:11cfcc5402da77a6739cb03bdfb1bcf72d52f973cae361ace637001e6cf3b966
|
| 3 |
+
size 14645
|
transformer/checkpoint-1525/scaler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cbcaa50b1076392191a1f8665b7b77d819d3cc73803b90b8509f590e4b16af02
|
| 3 |
+
size 1383
|
transformer/checkpoint-1525/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:93a1789096427e63a18497d8b158869d7f1d9fb6b44e74fef154d50a0e1bd1e0
|
| 3 |
+
size 1465
|
transformer/checkpoint-1525/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc02d42fb2a10276563109e2287cc0dbe6b595d5b3b3401c7cfeffc0b7e20270
|
| 3 |
+
size 17098351
|
transformer/checkpoint-1525/tokenizer_config.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": true,
|
| 3 |
+
"backend": "tokenizers",
|
| 4 |
+
"bos_token": "<s>",
|
| 5 |
+
"cls_token": "<s>",
|
| 6 |
+
"eos_token": "</s>",
|
| 7 |
+
"is_local": false,
|
| 8 |
+
"local_files_only": false,
|
| 9 |
+
"mask_token": "<mask>",
|
| 10 |
+
"model_max_length": 512,
|
| 11 |
+
"pad_token": "<pad>",
|
| 12 |
+
"sep_token": "</s>",
|
| 13 |
+
"tokenizer_class": "XLMRobertaTokenizer",
|
| 14 |
+
"unk_token": "<unk>"
|
| 15 |
+
}
|
transformer/checkpoint-1525/trainer_state.json
ADDED
|
@@ -0,0 +1,535 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 1525,
|
| 3 |
+
"best_metric": 0.8072289156626506,
|
| 4 |
+
"best_model_checkpoint": "/content/agri-utilization-classifier/transformer/checkpoint-1525",
|
| 5 |
+
"epoch": 5.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 1525,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.08196721311475409,
|
| 14 |
+
"grad_norm": 6.984184741973877,
|
| 15 |
+
"learning_rate": 3.157894736842105e-06,
|
| 16 |
+
"loss": 0.7167730712890625,
|
| 17 |
+
"step": 25
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 0.16393442622950818,
|
| 21 |
+
"grad_norm": 9.77598762512207,
|
| 22 |
+
"learning_rate": 6.447368421052632e-06,
|
| 23 |
+
"loss": 0.5636273193359375,
|
| 24 |
+
"step": 50
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"epoch": 0.2459016393442623,
|
| 28 |
+
"grad_norm": 8.441609382629395,
|
| 29 |
+
"learning_rate": 9.736842105263159e-06,
|
| 30 |
+
"loss": 0.37406421661376954,
|
| 31 |
+
"step": 75
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 0.32786885245901637,
|
| 35 |
+
"grad_norm": 5.313694953918457,
|
| 36 |
+
"learning_rate": 1.3026315789473684e-05,
|
| 37 |
+
"loss": 0.2965927886962891,
|
| 38 |
+
"step": 100
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"epoch": 0.4098360655737705,
|
| 42 |
+
"grad_norm": 7.240467548370361,
|
| 43 |
+
"learning_rate": 1.6315789473684213e-05,
|
| 44 |
+
"loss": 0.29742313385009767,
|
| 45 |
+
"step": 125
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"epoch": 0.4918032786885246,
|
| 49 |
+
"grad_norm": 6.100603103637695,
|
| 50 |
+
"learning_rate": 1.960526315789474e-05,
|
| 51 |
+
"loss": 0.2068590545654297,
|
| 52 |
+
"step": 150
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"epoch": 0.5737704918032787,
|
| 56 |
+
"grad_norm": 4.169040679931641,
|
| 57 |
+
"learning_rate": 1.9679533867443555e-05,
|
| 58 |
+
"loss": 0.21712726593017578,
|
| 59 |
+
"step": 175
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"epoch": 0.6557377049180327,
|
| 63 |
+
"grad_norm": 4.665876865386963,
|
| 64 |
+
"learning_rate": 1.9315367807720323e-05,
|
| 65 |
+
"loss": 0.2889243125915527,
|
| 66 |
+
"step": 200
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"epoch": 0.7377049180327869,
|
| 70 |
+
"grad_norm": 1.094870924949646,
|
| 71 |
+
"learning_rate": 1.8951201747997088e-05,
|
| 72 |
+
"loss": 0.2303921699523926,
|
| 73 |
+
"step": 225
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"epoch": 0.819672131147541,
|
| 77 |
+
"grad_norm": 1.2164329290390015,
|
| 78 |
+
"learning_rate": 1.8587035688273852e-05,
|
| 79 |
+
"loss": 0.16723501205444335,
|
| 80 |
+
"step": 250
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"epoch": 0.9016393442622951,
|
| 84 |
+
"grad_norm": 2.6314468383789062,
|
| 85 |
+
"learning_rate": 1.822286962855062e-05,
|
| 86 |
+
"loss": 0.15685997009277344,
|
| 87 |
+
"step": 275
|
| 88 |
+
},
|
| 89 |
+
{
|
| 90 |
+
"epoch": 0.9836065573770492,
|
| 91 |
+
"grad_norm": 14.927972793579102,
|
| 92 |
+
"learning_rate": 1.7858703568827385e-05,
|
| 93 |
+
"loss": 0.1979808807373047,
|
| 94 |
+
"step": 300
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"epoch": 1.0,
|
| 98 |
+
"eval_accuracy": 0.9642126789366053,
|
| 99 |
+
"eval_f1": 0.7852760736196319,
|
| 100 |
+
"eval_loss": 0.10679091513156891,
|
| 101 |
+
"eval_precision": 0.7619047619047619,
|
| 102 |
+
"eval_recall": 0.810126582278481,
|
| 103 |
+
"eval_roc_auc": 0.9606665634108222,
|
| 104 |
+
"eval_runtime": 3.9998,
|
| 105 |
+
"eval_samples_per_second": 244.514,
|
| 106 |
+
"eval_steps_per_second": 7.75,
|
| 107 |
+
"step": 305
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 1.0655737704918034,
|
| 111 |
+
"grad_norm": 22.521757125854492,
|
| 112 |
+
"learning_rate": 1.7494537509104153e-05,
|
| 113 |
+
"loss": 0.1316550636291504,
|
| 114 |
+
"step": 325
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 1.1475409836065573,
|
| 118 |
+
"grad_norm": 0.22687062621116638,
|
| 119 |
+
"learning_rate": 1.7130371449380918e-05,
|
| 120 |
+
"loss": 0.2059168815612793,
|
| 121 |
+
"step": 350
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 1.2295081967213115,
|
| 125 |
+
"grad_norm": 0.657261312007904,
|
| 126 |
+
"learning_rate": 1.6766205389657686e-05,
|
| 127 |
+
"loss": 0.09930330276489258,
|
| 128 |
+
"step": 375
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 1.3114754098360657,
|
| 132 |
+
"grad_norm": 4.796896457672119,
|
| 133 |
+
"learning_rate": 1.640203932993445e-05,
|
| 134 |
+
"loss": 0.13251757621765137,
|
| 135 |
+
"step": 400
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 1.3934426229508197,
|
| 139 |
+
"grad_norm": 0.5394258499145508,
|
| 140 |
+
"learning_rate": 1.603787327021122e-05,
|
| 141 |
+
"loss": 0.17834033966064453,
|
| 142 |
+
"step": 425
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 1.4754098360655736,
|
| 146 |
+
"grad_norm": 0.4655781388282776,
|
| 147 |
+
"learning_rate": 1.5673707210487983e-05,
|
| 148 |
+
"loss": 0.122637300491333,
|
| 149 |
+
"step": 450
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 1.5573770491803278,
|
| 153 |
+
"grad_norm": 0.42849695682525635,
|
| 154 |
+
"learning_rate": 1.530954115076475e-05,
|
| 155 |
+
"loss": 0.15983641624450684,
|
| 156 |
+
"step": 475
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 1.639344262295082,
|
| 160 |
+
"grad_norm": 0.45386794209480286,
|
| 161 |
+
"learning_rate": 1.4945375091041516e-05,
|
| 162 |
+
"loss": 0.14264726638793945,
|
| 163 |
+
"step": 500
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 1.721311475409836,
|
| 167 |
+
"grad_norm": 0.6170782446861267,
|
| 168 |
+
"learning_rate": 1.4581209031318282e-05,
|
| 169 |
+
"loss": 0.18886091232299804,
|
| 170 |
+
"step": 525
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 1.8032786885245902,
|
| 174 |
+
"grad_norm": 5.4546122550964355,
|
| 175 |
+
"learning_rate": 1.4217042971595047e-05,
|
| 176 |
+
"loss": 0.14393989562988282,
|
| 177 |
+
"step": 550
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 1.8852459016393444,
|
| 181 |
+
"grad_norm": 17.419189453125,
|
| 182 |
+
"learning_rate": 1.3852876911871815e-05,
|
| 183 |
+
"loss": 0.10272212982177735,
|
| 184 |
+
"step": 575
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 1.9672131147540983,
|
| 188 |
+
"grad_norm": 1.6267497539520264,
|
| 189 |
+
"learning_rate": 1.3488710852148582e-05,
|
| 190 |
+
"loss": 0.1810975456237793,
|
| 191 |
+
"step": 600
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 2.0,
|
| 195 |
+
"eval_accuracy": 0.9580777096114519,
|
| 196 |
+
"eval_f1": 0.7759562841530054,
|
| 197 |
+
"eval_loss": 0.17126062512397766,
|
| 198 |
+
"eval_precision": 0.6826923076923077,
|
| 199 |
+
"eval_recall": 0.8987341772151899,
|
| 200 |
+
"eval_roc_auc": 0.9631939848777121,
|
| 201 |
+
"eval_runtime": 3.84,
|
| 202 |
+
"eval_samples_per_second": 254.687,
|
| 203 |
+
"eval_steps_per_second": 8.073,
|
| 204 |
+
"step": 610
|
| 205 |
+
},
|
| 206 |
+
{
|
| 207 |
+
"epoch": 2.0491803278688523,
|
| 208 |
+
"grad_norm": 1.3180276155471802,
|
| 209 |
+
"learning_rate": 1.3124544792425346e-05,
|
| 210 |
+
"loss": 0.05921304225921631,
|
| 211 |
+
"step": 625
|
| 212 |
+
},
|
| 213 |
+
{
|
| 214 |
+
"epoch": 2.1311475409836067,
|
| 215 |
+
"grad_norm": 5.675038814544678,
|
| 216 |
+
"learning_rate": 1.2760378732702113e-05,
|
| 217 |
+
"loss": 0.16824769973754883,
|
| 218 |
+
"step": 650
|
| 219 |
+
},
|
| 220 |
+
{
|
| 221 |
+
"epoch": 2.2131147540983607,
|
| 222 |
+
"grad_norm": 0.16993092000484467,
|
| 223 |
+
"learning_rate": 1.239621267297888e-05,
|
| 224 |
+
"loss": 0.12186273574829101,
|
| 225 |
+
"step": 675
|
| 226 |
+
},
|
| 227 |
+
{
|
| 228 |
+
"epoch": 2.2950819672131146,
|
| 229 |
+
"grad_norm": 1.1791695356369019,
|
| 230 |
+
"learning_rate": 1.2032046613255645e-05,
|
| 231 |
+
"loss": 0.08795836448669433,
|
| 232 |
+
"step": 700
|
| 233 |
+
},
|
| 234 |
+
{
|
| 235 |
+
"epoch": 2.3770491803278686,
|
| 236 |
+
"grad_norm": 0.07541065663099289,
|
| 237 |
+
"learning_rate": 1.1667880553532412e-05,
|
| 238 |
+
"loss": 0.10129087448120117,
|
| 239 |
+
"step": 725
|
| 240 |
+
},
|
| 241 |
+
{
|
| 242 |
+
"epoch": 2.459016393442623,
|
| 243 |
+
"grad_norm": 3.394912004470825,
|
| 244 |
+
"learning_rate": 1.1303714493809176e-05,
|
| 245 |
+
"loss": 0.14056243896484374,
|
| 246 |
+
"step": 750
|
| 247 |
+
},
|
| 248 |
+
{
|
| 249 |
+
"epoch": 2.540983606557377,
|
| 250 |
+
"grad_norm": 8.074258804321289,
|
| 251 |
+
"learning_rate": 1.0939548434085944e-05,
|
| 252 |
+
"loss": 0.06563093185424805,
|
| 253 |
+
"step": 775
|
| 254 |
+
},
|
| 255 |
+
{
|
| 256 |
+
"epoch": 2.6229508196721314,
|
| 257 |
+
"grad_norm": 12.472029685974121,
|
| 258 |
+
"learning_rate": 1.057538237436271e-05,
|
| 259 |
+
"loss": 0.09851057052612305,
|
| 260 |
+
"step": 800
|
| 261 |
+
},
|
| 262 |
+
{
|
| 263 |
+
"epoch": 2.7049180327868854,
|
| 264 |
+
"grad_norm": 0.10368915647268295,
|
| 265 |
+
"learning_rate": 1.0211216314639475e-05,
|
| 266 |
+
"loss": 0.11658324241638184,
|
| 267 |
+
"step": 825
|
| 268 |
+
},
|
| 269 |
+
{
|
| 270 |
+
"epoch": 2.7868852459016393,
|
| 271 |
+
"grad_norm": 44.263092041015625,
|
| 272 |
+
"learning_rate": 9.847050254916243e-06,
|
| 273 |
+
"loss": 0.13634946823120117,
|
| 274 |
+
"step": 850
|
| 275 |
+
},
|
| 276 |
+
{
|
| 277 |
+
"epoch": 2.8688524590163933,
|
| 278 |
+
"grad_norm": 0.07709958404302597,
|
| 279 |
+
"learning_rate": 9.482884195193008e-06,
|
| 280 |
+
"loss": 0.12144805908203125,
|
| 281 |
+
"step": 875
|
| 282 |
+
},
|
| 283 |
+
{
|
| 284 |
+
"epoch": 2.9508196721311473,
|
| 285 |
+
"grad_norm": 0.11255892366170883,
|
| 286 |
+
"learning_rate": 9.118718135469774e-06,
|
| 287 |
+
"loss": 0.11815821647644043,
|
| 288 |
+
"step": 900
|
| 289 |
+
},
|
| 290 |
+
{
|
| 291 |
+
"epoch": 3.0,
|
| 292 |
+
"eval_accuracy": 0.9601226993865031,
|
| 293 |
+
"eval_f1": 0.7868852459016393,
|
| 294 |
+
"eval_loss": 0.14711864292621613,
|
| 295 |
+
"eval_precision": 0.6923076923076923,
|
| 296 |
+
"eval_recall": 0.9113924050632911,
|
| 297 |
+
"eval_roc_auc": 0.9719449177003984,
|
| 298 |
+
"eval_runtime": 3.9043,
|
| 299 |
+
"eval_samples_per_second": 250.495,
|
| 300 |
+
"eval_steps_per_second": 7.94,
|
| 301 |
+
"step": 915
|
| 302 |
+
},
|
| 303 |
+
{
|
| 304 |
+
"epoch": 3.0327868852459017,
|
| 305 |
+
"grad_norm": 0.2808685302734375,
|
| 306 |
+
"learning_rate": 8.754552075746541e-06,
|
| 307 |
+
"loss": 0.1051255989074707,
|
| 308 |
+
"step": 925
|
| 309 |
+
},
|
| 310 |
+
{
|
| 311 |
+
"epoch": 3.1147540983606556,
|
| 312 |
+
"grad_norm": 0.07298991084098816,
|
| 313 |
+
"learning_rate": 8.390386016023307e-06,
|
| 314 |
+
"loss": 0.08817357063293457,
|
| 315 |
+
"step": 950
|
| 316 |
+
},
|
| 317 |
+
{
|
| 318 |
+
"epoch": 3.19672131147541,
|
| 319 |
+
"grad_norm": 0.049921419471502304,
|
| 320 |
+
"learning_rate": 8.026219956300074e-06,
|
| 321 |
+
"loss": 0.11110530853271484,
|
| 322 |
+
"step": 975
|
| 323 |
+
},
|
| 324 |
+
{
|
| 325 |
+
"epoch": 3.278688524590164,
|
| 326 |
+
"grad_norm": 1.874350905418396,
|
| 327 |
+
"learning_rate": 7.66205389657684e-06,
|
| 328 |
+
"loss": 0.09003183364868164,
|
| 329 |
+
"step": 1000
|
| 330 |
+
},
|
| 331 |
+
{
|
| 332 |
+
"epoch": 3.360655737704918,
|
| 333 |
+
"grad_norm": 0.09576287865638733,
|
| 334 |
+
"learning_rate": 7.2978878368536055e-06,
|
| 335 |
+
"loss": 0.05897871017456055,
|
| 336 |
+
"step": 1025
|
| 337 |
+
},
|
| 338 |
+
{
|
| 339 |
+
"epoch": 3.442622950819672,
|
| 340 |
+
"grad_norm": 20.84284019470215,
|
| 341 |
+
"learning_rate": 6.933721777130372e-06,
|
| 342 |
+
"loss": 0.06021720886230469,
|
| 343 |
+
"step": 1050
|
| 344 |
+
},
|
| 345 |
+
{
|
| 346 |
+
"epoch": 3.5245901639344264,
|
| 347 |
+
"grad_norm": 0.06452233344316483,
|
| 348 |
+
"learning_rate": 6.569555717407138e-06,
|
| 349 |
+
"loss": 0.06818977355957032,
|
| 350 |
+
"step": 1075
|
| 351 |
+
},
|
| 352 |
+
{
|
| 353 |
+
"epoch": 3.6065573770491803,
|
| 354 |
+
"grad_norm": 0.2655308246612549,
|
| 355 |
+
"learning_rate": 6.2053896576839045e-06,
|
| 356 |
+
"loss": 0.07051475524902344,
|
| 357 |
+
"step": 1100
|
| 358 |
+
},
|
| 359 |
+
{
|
| 360 |
+
"epoch": 3.6885245901639343,
|
| 361 |
+
"grad_norm": 0.05852988734841347,
|
| 362 |
+
"learning_rate": 5.84122359796067e-06,
|
| 363 |
+
"loss": 0.08089996337890625,
|
| 364 |
+
"step": 1125
|
| 365 |
+
},
|
| 366 |
+
{
|
| 367 |
+
"epoch": 3.7704918032786887,
|
| 368 |
+
"grad_norm": 13.798267364501953,
|
| 369 |
+
"learning_rate": 5.477057538237437e-06,
|
| 370 |
+
"loss": 0.0827936840057373,
|
| 371 |
+
"step": 1150
|
| 372 |
+
},
|
| 373 |
+
{
|
| 374 |
+
"epoch": 3.8524590163934427,
|
| 375 |
+
"grad_norm": 6.363399982452393,
|
| 376 |
+
"learning_rate": 5.112891478514203e-06,
|
| 377 |
+
"loss": 0.06787658214569092,
|
| 378 |
+
"step": 1175
|
| 379 |
+
},
|
| 380 |
+
{
|
| 381 |
+
"epoch": 3.9344262295081966,
|
| 382 |
+
"grad_norm": 0.059008605778217316,
|
| 383 |
+
"learning_rate": 4.748725418790969e-06,
|
| 384 |
+
"loss": 0.05120136260986328,
|
| 385 |
+
"step": 1200
|
| 386 |
+
},
|
| 387 |
+
{
|
| 388 |
+
"epoch": 4.0,
|
| 389 |
+
"eval_accuracy": 0.9662576687116564,
|
| 390 |
+
"eval_f1": 0.8,
|
| 391 |
+
"eval_loss": 0.1406129151582718,
|
| 392 |
+
"eval_precision": 0.7674418604651163,
|
| 393 |
+
"eval_recall": 0.8354430379746836,
|
| 394 |
+
"eval_roc_auc": 0.9636797566916827,
|
| 395 |
+
"eval_runtime": 3.9045,
|
| 396 |
+
"eval_samples_per_second": 250.478,
|
| 397 |
+
"eval_steps_per_second": 7.939,
|
| 398 |
+
"step": 1220
|
| 399 |
+
},
|
| 400 |
+
{
|
| 401 |
+
"epoch": 4.016393442622951,
|
| 402 |
+
"grad_norm": 7.579391002655029,
|
| 403 |
+
"learning_rate": 4.3845593590677355e-06,
|
| 404 |
+
"loss": 0.07703603267669677,
|
| 405 |
+
"step": 1225
|
| 406 |
+
},
|
| 407 |
+
{
|
| 408 |
+
"epoch": 4.098360655737705,
|
| 409 |
+
"grad_norm": 2.102841377258301,
|
| 410 |
+
"learning_rate": 4.020393299344502e-06,
|
| 411 |
+
"loss": 0.032845423221588135,
|
| 412 |
+
"step": 1250
|
| 413 |
+
},
|
| 414 |
+
{
|
| 415 |
+
"epoch": 4.180327868852459,
|
| 416 |
+
"grad_norm": 0.07024268805980682,
|
| 417 |
+
"learning_rate": 3.656227239621268e-06,
|
| 418 |
+
"loss": 0.03449820995330811,
|
| 419 |
+
"step": 1275
|
| 420 |
+
},
|
| 421 |
+
{
|
| 422 |
+
"epoch": 4.262295081967213,
|
| 423 |
+
"grad_norm": 0.02977728098630905,
|
| 424 |
+
"learning_rate": 3.292061179898034e-06,
|
| 425 |
+
"loss": 0.06648642539978028,
|
| 426 |
+
"step": 1300
|
| 427 |
+
},
|
| 428 |
+
{
|
| 429 |
+
"epoch": 4.344262295081967,
|
| 430 |
+
"grad_norm": 0.02086547203361988,
|
| 431 |
+
"learning_rate": 2.9278951201748e-06,
|
| 432 |
+
"loss": 0.06277695178985596,
|
| 433 |
+
"step": 1325
|
| 434 |
+
},
|
| 435 |
+
{
|
| 436 |
+
"epoch": 4.426229508196721,
|
| 437 |
+
"grad_norm": 0.15774419903755188,
|
| 438 |
+
"learning_rate": 2.5637290604515665e-06,
|
| 439 |
+
"loss": 0.02380265951156616,
|
| 440 |
+
"step": 1350
|
| 441 |
+
},
|
| 442 |
+
{
|
| 443 |
+
"epoch": 4.508196721311475,
|
| 444 |
+
"grad_norm": 0.030775833874940872,
|
| 445 |
+
"learning_rate": 2.1995630007283324e-06,
|
| 446 |
+
"loss": 0.06929955959320068,
|
| 447 |
+
"step": 1375
|
| 448 |
+
},
|
| 449 |
+
{
|
| 450 |
+
"epoch": 4.590163934426229,
|
| 451 |
+
"grad_norm": 0.07542883604764938,
|
| 452 |
+
"learning_rate": 1.8353969410050983e-06,
|
| 453 |
+
"loss": 0.027865142822265626,
|
| 454 |
+
"step": 1400
|
| 455 |
+
},
|
| 456 |
+
{
|
| 457 |
+
"epoch": 4.672131147540983,
|
| 458 |
+
"grad_norm": 6.874780178070068,
|
| 459 |
+
"learning_rate": 1.4712308812818645e-06,
|
| 460 |
+
"loss": 0.0548116397857666,
|
| 461 |
+
"step": 1425
|
| 462 |
+
},
|
| 463 |
+
{
|
| 464 |
+
"epoch": 4.754098360655737,
|
| 465 |
+
"grad_norm": 37.22102355957031,
|
| 466 |
+
"learning_rate": 1.1070648215586309e-06,
|
| 467 |
+
"loss": 0.04357499122619629,
|
| 468 |
+
"step": 1450
|
| 469 |
+
},
|
| 470 |
+
{
|
| 471 |
+
"epoch": 4.836065573770492,
|
| 472 |
+
"grad_norm": 0.03843735158443451,
|
| 473 |
+
"learning_rate": 7.428987618353969e-07,
|
| 474 |
+
"loss": 0.03130460500717163,
|
| 475 |
+
"step": 1475
|
| 476 |
+
},
|
| 477 |
+
{
|
| 478 |
+
"epoch": 4.918032786885246,
|
| 479 |
+
"grad_norm": 0.028637070208787918,
|
| 480 |
+
"learning_rate": 3.787327021121632e-07,
|
| 481 |
+
"loss": 0.02157698631286621,
|
| 482 |
+
"step": 1500
|
| 483 |
+
},
|
| 484 |
+
{
|
| 485 |
+
"epoch": 5.0,
|
| 486 |
+
"grad_norm": 0.021514365449547768,
|
| 487 |
+
"learning_rate": 1.4566642388929353e-08,
|
| 488 |
+
"loss": 0.04632264614105225,
|
| 489 |
+
"step": 1525
|
| 490 |
+
},
|
| 491 |
+
{
|
| 492 |
+
"epoch": 5.0,
|
| 493 |
+
"eval_accuracy": 0.967280163599182,
|
| 494 |
+
"eval_f1": 0.8072289156626506,
|
| 495 |
+
"eval_loss": 0.15767407417297363,
|
| 496 |
+
"eval_precision": 0.7701149425287356,
|
| 497 |
+
"eval_recall": 0.8481012658227848,
|
| 498 |
+
"eval_roc_auc": 0.9619549147435266,
|
| 499 |
+
"eval_runtime": 3.8427,
|
| 500 |
+
"eval_samples_per_second": 254.511,
|
| 501 |
+
"eval_steps_per_second": 8.067,
|
| 502 |
+
"step": 1525
|
| 503 |
+
}
|
| 504 |
+
],
|
| 505 |
+
"logging_steps": 25,
|
| 506 |
+
"max_steps": 1525,
|
| 507 |
+
"num_input_tokens_seen": 0,
|
| 508 |
+
"num_train_epochs": 5,
|
| 509 |
+
"save_steps": 500,
|
| 510 |
+
"stateful_callbacks": {
|
| 511 |
+
"EarlyStoppingCallback": {
|
| 512 |
+
"args": {
|
| 513 |
+
"early_stopping_patience": 2,
|
| 514 |
+
"early_stopping_threshold": 0.0
|
| 515 |
+
},
|
| 516 |
+
"attributes": {
|
| 517 |
+
"early_stopping_patience_counter": 0
|
| 518 |
+
}
|
| 519 |
+
},
|
| 520 |
+
"TrainerControl": {
|
| 521 |
+
"args": {
|
| 522 |
+
"should_epoch_stop": false,
|
| 523 |
+
"should_evaluate": false,
|
| 524 |
+
"should_log": false,
|
| 525 |
+
"should_save": true,
|
| 526 |
+
"should_training_stop": true
|
| 527 |
+
},
|
| 528 |
+
"attributes": {}
|
| 529 |
+
}
|
| 530 |
+
},
|
| 531 |
+
"total_flos": 3207981542476800.0,
|
| 532 |
+
"train_batch_size": 16,
|
| 533 |
+
"trial_name": null,
|
| 534 |
+
"trial_params": null
|
| 535 |
+
}
|
transformer/checkpoint-1525/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c60366894b25ead0379e8d97e61f1123e1ad4786f5e41a8bc70f2d7bc8901f5
|
| 3 |
+
size 5329
|
transformer/checkpoint-305/config.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_cross_attention": false,
|
| 3 |
+
"architectures": [
|
| 4 |
+
"XLMRobertaForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"bos_token_id": 0,
|
| 8 |
+
"classifier_dropout": null,
|
| 9 |
+
"dtype": "float32",
|
| 10 |
+
"eos_token_id": 2,
|
| 11 |
+
"hidden_act": "gelu",
|
| 12 |
+
"hidden_dropout_prob": 0.1,
|
| 13 |
+
"hidden_size": 768,
|
| 14 |
+
"id2label": {
|
| 15 |
+
"0": "NOT_RELEVANT",
|
| 16 |
+
"1": "RELEVANT"
|
| 17 |
+
},
|
| 18 |
+
"initializer_range": 0.02,
|
| 19 |
+
"intermediate_size": 3072,
|
| 20 |
+
"is_decoder": false,
|
| 21 |
+
"label2id": {
|
| 22 |
+
"NOT_RELEVANT": 0,
|
| 23 |
+
"RELEVANT": 1
|
| 24 |
+
},
|
| 25 |
+
"layer_norm_eps": 1e-05,
|
| 26 |
+
"max_position_embeddings": 514,
|
| 27 |
+
"model_type": "xlm-roberta",
|
| 28 |
+
"num_attention_heads": 12,
|
| 29 |
+
"num_hidden_layers": 12,
|
| 30 |
+
"output_past": true,
|
| 31 |
+
"pad_token_id": 1,
|
| 32 |
+
"position_embedding_type": "absolute",
|
| 33 |
+
"problem_type": "single_label_classification",
|
| 34 |
+
"tie_word_embeddings": true,
|
| 35 |
+
"transformers_version": "5.9.0",
|
| 36 |
+
"type_vocab_size": 1,
|
| 37 |
+
"use_cache": false,
|
| 38 |
+
"vocab_size": 250002
|
| 39 |
+
}
|
transformer/checkpoint-305/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bb20e73a65d9daab2db9b645e0bd0608878ef7b24edb7ac88b5558863e0c5ecc
|
| 3 |
+
size 1112205008
|
transformer/checkpoint-305/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:48f419be6f85e4a9a74197d6249f1efe7127ac7668e97007ff458f4b5034cff4
|
| 3 |
+
size 2224532875
|
transformer/checkpoint-305/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:402debc68e858d5bda7b8fed48fe7886888add874790e6471baac63ac8bdc564
|
| 3 |
+
size 14645
|
transformer/checkpoint-305/scaler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9b479a091e6482cdb8085a5931fe45bae60997ab8939a20b371d61d6be6f7199
|
| 3 |
+
size 1383
|
transformer/checkpoint-305/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8dcb4d91eb3f8c20bc70fefd93bf7ba0111a76badc2faa81080aeb911ab9bd59
|
| 3 |
+
size 1465
|
transformer/checkpoint-305/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc02d42fb2a10276563109e2287cc0dbe6b595d5b3b3401c7cfeffc0b7e20270
|
| 3 |
+
size 17098351
|
transformer/checkpoint-305/tokenizer_config.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": true,
|
| 3 |
+
"backend": "tokenizers",
|
| 4 |
+
"bos_token": "<s>",
|
| 5 |
+
"cls_token": "<s>",
|
| 6 |
+
"eos_token": "</s>",
|
| 7 |
+
"is_local": false,
|
| 8 |
+
"local_files_only": false,
|
| 9 |
+
"mask_token": "<mask>",
|
| 10 |
+
"model_max_length": 512,
|
| 11 |
+
"pad_token": "<pad>",
|
| 12 |
+
"sep_token": "</s>",
|
| 13 |
+
"tokenizer_class": "XLMRobertaTokenizer",
|
| 14 |
+
"unk_token": "<unk>"
|
| 15 |
+
}
|
transformer/checkpoint-305/trainer_state.json
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 305,
|
| 3 |
+
"best_metric": 0.7852760736196319,
|
| 4 |
+
"best_model_checkpoint": "/content/agri-utilization-classifier/transformer/checkpoint-305",
|
| 5 |
+
"epoch": 1.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 305,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.08196721311475409,
|
| 14 |
+
"grad_norm": 6.984184741973877,
|
| 15 |
+
"learning_rate": 3.157894736842105e-06,
|
| 16 |
+
"loss": 0.7167730712890625,
|
| 17 |
+
"step": 25
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 0.16393442622950818,
|
| 21 |
+
"grad_norm": 9.77598762512207,
|
| 22 |
+
"learning_rate": 6.447368421052632e-06,
|
| 23 |
+
"loss": 0.5636273193359375,
|
| 24 |
+
"step": 50
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"epoch": 0.2459016393442623,
|
| 28 |
+
"grad_norm": 8.441609382629395,
|
| 29 |
+
"learning_rate": 9.736842105263159e-06,
|
| 30 |
+
"loss": 0.37406421661376954,
|
| 31 |
+
"step": 75
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 0.32786885245901637,
|
| 35 |
+
"grad_norm": 5.313694953918457,
|
| 36 |
+
"learning_rate": 1.3026315789473684e-05,
|
| 37 |
+
"loss": 0.2965927886962891,
|
| 38 |
+
"step": 100
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"epoch": 0.4098360655737705,
|
| 42 |
+
"grad_norm": 7.240467548370361,
|
| 43 |
+
"learning_rate": 1.6315789473684213e-05,
|
| 44 |
+
"loss": 0.29742313385009767,
|
| 45 |
+
"step": 125
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"epoch": 0.4918032786885246,
|
| 49 |
+
"grad_norm": 6.100603103637695,
|
| 50 |
+
"learning_rate": 1.960526315789474e-05,
|
| 51 |
+
"loss": 0.2068590545654297,
|
| 52 |
+
"step": 150
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"epoch": 0.5737704918032787,
|
| 56 |
+
"grad_norm": 4.169040679931641,
|
| 57 |
+
"learning_rate": 1.9679533867443555e-05,
|
| 58 |
+
"loss": 0.21712726593017578,
|
| 59 |
+
"step": 175
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"epoch": 0.6557377049180327,
|
| 63 |
+
"grad_norm": 4.665876865386963,
|
| 64 |
+
"learning_rate": 1.9315367807720323e-05,
|
| 65 |
+
"loss": 0.2889243125915527,
|
| 66 |
+
"step": 200
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"epoch": 0.7377049180327869,
|
| 70 |
+
"grad_norm": 1.094870924949646,
|
| 71 |
+
"learning_rate": 1.8951201747997088e-05,
|
| 72 |
+
"loss": 0.2303921699523926,
|
| 73 |
+
"step": 225
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"epoch": 0.819672131147541,
|
| 77 |
+
"grad_norm": 1.2164329290390015,
|
| 78 |
+
"learning_rate": 1.8587035688273852e-05,
|
| 79 |
+
"loss": 0.16723501205444335,
|
| 80 |
+
"step": 250
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"epoch": 0.9016393442622951,
|
| 84 |
+
"grad_norm": 2.6314468383789062,
|
| 85 |
+
"learning_rate": 1.822286962855062e-05,
|
| 86 |
+
"loss": 0.15685997009277344,
|
| 87 |
+
"step": 275
|
| 88 |
+
},
|
| 89 |
+
{
|
| 90 |
+
"epoch": 0.9836065573770492,
|
| 91 |
+
"grad_norm": 14.927972793579102,
|
| 92 |
+
"learning_rate": 1.7858703568827385e-05,
|
| 93 |
+
"loss": 0.1979808807373047,
|
| 94 |
+
"step": 300
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"epoch": 1.0,
|
| 98 |
+
"eval_accuracy": 0.9642126789366053,
|
| 99 |
+
"eval_f1": 0.7852760736196319,
|
| 100 |
+
"eval_loss": 0.10679091513156891,
|
| 101 |
+
"eval_precision": 0.7619047619047619,
|
| 102 |
+
"eval_recall": 0.810126582278481,
|
| 103 |
+
"eval_roc_auc": 0.9606665634108222,
|
| 104 |
+
"eval_runtime": 3.9998,
|
| 105 |
+
"eval_samples_per_second": 244.514,
|
| 106 |
+
"eval_steps_per_second": 7.75,
|
| 107 |
+
"step": 305
|
| 108 |
+
}
|
| 109 |
+
],
|
| 110 |
+
"logging_steps": 25,
|
| 111 |
+
"max_steps": 1525,
|
| 112 |
+
"num_input_tokens_seen": 0,
|
| 113 |
+
"num_train_epochs": 5,
|
| 114 |
+
"save_steps": 500,
|
| 115 |
+
"stateful_callbacks": {
|
| 116 |
+
"EarlyStoppingCallback": {
|
| 117 |
+
"args": {
|
| 118 |
+
"early_stopping_patience": 2,
|
| 119 |
+
"early_stopping_threshold": 0.0
|
| 120 |
+
},
|
| 121 |
+
"attributes": {
|
| 122 |
+
"early_stopping_patience_counter": 0
|
| 123 |
+
}
|
| 124 |
+
},
|
| 125 |
+
"TrainerControl": {
|
| 126 |
+
"args": {
|
| 127 |
+
"should_epoch_stop": false,
|
| 128 |
+
"should_evaluate": false,
|
| 129 |
+
"should_log": false,
|
| 130 |
+
"should_save": true,
|
| 131 |
+
"should_training_stop": false
|
| 132 |
+
},
|
| 133 |
+
"attributes": {}
|
| 134 |
+
}
|
| 135 |
+
},
|
| 136 |
+
"total_flos": 641596308495360.0,
|
| 137 |
+
"train_batch_size": 16,
|
| 138 |
+
"trial_name": null,
|
| 139 |
+
"trial_params": null
|
| 140 |
+
}
|
transformer/checkpoint-305/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c60366894b25ead0379e8d97e61f1123e1ad4786f5e41a8bc70f2d7bc8901f5
|
| 3 |
+
size 5329
|
transformer/checkpoint-610/config.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_cross_attention": false,
|
| 3 |
+
"architectures": [
|
| 4 |
+
"XLMRobertaForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"bos_token_id": 0,
|
| 8 |
+
"classifier_dropout": null,
|
| 9 |
+
"dtype": "float32",
|
| 10 |
+
"eos_token_id": 2,
|
| 11 |
+
"hidden_act": "gelu",
|
| 12 |
+
"hidden_dropout_prob": 0.1,
|
| 13 |
+
"hidden_size": 768,
|
| 14 |
+
"id2label": {
|
| 15 |
+
"0": "NOT_RELEVANT",
|
| 16 |
+
"1": "RELEVANT"
|
| 17 |
+
},
|
| 18 |
+
"initializer_range": 0.02,
|
| 19 |
+
"intermediate_size": 3072,
|
| 20 |
+
"is_decoder": false,
|
| 21 |
+
"label2id": {
|
| 22 |
+
"NOT_RELEVANT": 0,
|
| 23 |
+
"RELEVANT": 1
|
| 24 |
+
},
|
| 25 |
+
"layer_norm_eps": 1e-05,
|
| 26 |
+
"max_position_embeddings": 514,
|
| 27 |
+
"model_type": "xlm-roberta",
|
| 28 |
+
"num_attention_heads": 12,
|
| 29 |
+
"num_hidden_layers": 12,
|
| 30 |
+
"output_past": true,
|
| 31 |
+
"pad_token_id": 1,
|
| 32 |
+
"position_embedding_type": "absolute",
|
| 33 |
+
"problem_type": "single_label_classification",
|
| 34 |
+
"tie_word_embeddings": true,
|
| 35 |
+
"transformers_version": "5.9.0",
|
| 36 |
+
"type_vocab_size": 1,
|
| 37 |
+
"use_cache": false,
|
| 38 |
+
"vocab_size": 250002
|
| 39 |
+
}
|