lgsilvaesilva commited on
Commit
57dcd10
·
verified ·
1 Parent(s): b813f7b

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +6 -0
  2. README.md +86 -47
  3. REPORT.md +54 -34
  4. baselines/embedding-lightgbm/embedding-lightgbm.joblib +2 -2
  5. baselines/embedding-lightgbm/test_predictions.csv +0 -0
  6. baselines/embedding-lightgbm/validation_predictions.csv +0 -0
  7. baselines/embedding-logistic/embedding-logistic.joblib +2 -2
  8. baselines/embedding-logistic/test_predictions.csv +0 -0
  9. baselines/embedding-logistic/validation_predictions.csv +0 -0
  10. baselines/embedding-svm/embedding-svm.joblib +2 -2
  11. baselines/embedding-svm/test_predictions.csv +0 -0
  12. baselines/embedding-svm/validation_predictions.csv +0 -0
  13. report.json +759 -206
  14. transformer/checkpoint-1135/config.json +39 -0
  15. transformer/checkpoint-1135/model.safetensors +3 -0
  16. transformer/checkpoint-1135/optimizer.pt +3 -0
  17. transformer/checkpoint-1135/rng_state.pth +3 -0
  18. transformer/checkpoint-1135/scaler.pt +3 -0
  19. transformer/checkpoint-1135/scheduler.pt +3 -0
  20. transformer/checkpoint-1135/tokenizer.json +3 -0
  21. transformer/checkpoint-1135/tokenizer_config.json +15 -0
  22. transformer/checkpoint-1135/trainer_state.json +423 -0
  23. transformer/checkpoint-1135/training_args.bin +3 -0
  24. transformer/checkpoint-227/config.json +21 -19
  25. transformer/checkpoint-227/model.safetensors +2 -2
  26. transformer/checkpoint-227/optimizer.pt +2 -2
  27. transformer/checkpoint-227/rng_state.pth +1 -1
  28. transformer/checkpoint-227/scaler.pt +1 -1
  29. transformer/checkpoint-227/tokenizer.json +0 -0
  30. transformer/checkpoint-227/tokenizer_config.json +9 -9
  31. transformer/checkpoint-227/trainer_state.json +29 -29
  32. transformer/checkpoint-454/config.json +21 -19
  33. transformer/checkpoint-454/model.safetensors +2 -2
  34. transformer/checkpoint-454/optimizer.pt +2 -2
  35. transformer/checkpoint-454/rng_state.pth +1 -1
  36. transformer/checkpoint-454/scaler.pt +1 -1
  37. transformer/checkpoint-454/tokenizer.json +0 -0
  38. transformer/checkpoint-454/tokenizer_config.json +9 -9
  39. transformer/checkpoint-454/trainer_state.json +56 -56
  40. transformer/checkpoint-681/config.json +21 -19
  41. transformer/checkpoint-681/model.safetensors +2 -2
  42. transformer/checkpoint-681/optimizer.pt +2 -2
  43. transformer/checkpoint-681/rng_state.pth +1 -1
  44. transformer/checkpoint-681/scaler.pt +1 -1
  45. transformer/checkpoint-681/tokenizer.json +0 -0
  46. transformer/checkpoint-681/tokenizer_config.json +9 -9
  47. transformer/checkpoint-681/trainer_state.json +86 -86
  48. transformer/checkpoint-908/config.json +21 -19
  49. transformer/checkpoint-908/model.safetensors +2 -2
  50. transformer/checkpoint-908/optimizer.pt +2 -2
.gitattributes CHANGED
@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ transformer/checkpoint-1135/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ transformer/checkpoint-227/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
+ transformer/checkpoint-454/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
+ transformer/checkpoint-681/tokenizer.json filter=lfs diff=lfs merge=lfs -text
40
+ transformer/checkpoint-908/tokenizer.json filter=lfs diff=lfs merge=lfs -text
41
+ transformer/tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
  library_name: transformers
3
  pipeline_tag: text-classification
4
- base_model: distilbert/distilbert-base-multilingual-cased
5
  tags:
6
  - text-classification
7
  - binary-classification
@@ -17,10 +17,11 @@ It includes the Transformer model, any configured TF-IDF or sentence-embedding b
17
 
18
  - Dataset: `faodl/amis-agri-wheat`
19
  - Dataset subset: ``
 
20
  - Text column: `chunk_text`
21
  - Label column: `label`
22
- - Transformer: `distilbert/distilbert-base-multilingual-cased`
23
- - Generated at: `2026-05-19T20:24:29.088047+00:00`
24
 
25
  ## Dataset Summary
26
 
@@ -30,6 +31,25 @@ It includes the Transformer model, any configured TF-IDF or sentence-embedding b
30
  | validation | 759 | 486 | 273 | 396 | 636.7 |
31
  | test | 762 | 470 | 292 | 397 | 643.3 |
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  ## Threshold Comparison on Test Split
34
 
35
  | Model | Threshold | Accuracy | Precision | Recall | F1 | ROC AUC | Average precision |
@@ -38,14 +58,14 @@ It includes the Transformer model, any configured TF-IDF or sentence-embedding b
38
  | logistic_tfidf | 0.470 | 0.797 | 0.688 | 0.860 | 0.764 | 0.888 | 0.827 |
39
  | xgboost_tfidf | 0.500 | 0.835 | 0.773 | 0.805 | 0.789 | 0.910 | 0.831 |
40
  | xgboost_tfidf | 0.520 | 0.835 | 0.777 | 0.798 | 0.787 | 0.910 | 0.831 |
41
- | embedding-logistic_sentence_embeddings | 0.500 | 0.795 | 0.711 | 0.784 | 0.746 | 0.880 | 0.825 |
42
- | embedding-logistic_sentence_embeddings | 0.564 | 0.799 | 0.746 | 0.723 | 0.734 | 0.880 | 0.825 |
43
- | embedding-svm_sentence_embeddings | 0.500 | 0.811 | 0.794 | 0.685 | 0.735 | 0.884 | 0.828 |
44
- | embedding-svm_sentence_embeddings | 0.406 | 0.801 | 0.716 | 0.795 | 0.753 | 0.884 | 0.828 |
45
- | embedding-lightgbm_sentence_embeddings | 0.500 | 0.807 | 0.756 | 0.733 | 0.744 | 0.888 | 0.823 |
46
- | embedding-lightgbm_sentence_embeddings | 0.540 | 0.806 | 0.761 | 0.719 | 0.739 | 0.888 | 0.823 |
47
- | transformer | 0.500 | 0.881 | 0.823 | 0.877 | 0.849 | 0.944 | 0.905 |
48
- | transformer | 0.581 | 0.881 | 0.830 | 0.866 | 0.848 | 0.944 | 0.905 |
49
 
50
  ## Confusion Matrices on Test Split
51
 
@@ -83,67 +103,67 @@ Rows are true labels and columns are predicted labels.
83
 
84
  | True / Predicted | NOT_RELEVANT | RELEVANT |
85
  | --- | ---: | ---: |
86
- | NOT_RELEVANT | 377 | 93 |
87
- | RELEVANT | 63 | 229 |
88
 
89
- ### embedding-logistic_sentence_embeddings at threshold 0.564
90
 
91
  | True / Predicted | NOT_RELEVANT | RELEVANT |
92
  | --- | ---: | ---: |
93
- | NOT_RELEVANT | 398 | 72 |
94
- | RELEVANT | 81 | 211 |
95
 
96
  ### embedding-svm_sentence_embeddings at threshold 0.500
97
 
98
  | True / Predicted | NOT_RELEVANT | RELEVANT |
99
  | --- | ---: | ---: |
100
- | NOT_RELEVANT | 418 | 52 |
101
- | RELEVANT | 92 | 200 |
102
 
103
- ### embedding-svm_sentence_embeddings at threshold 0.406
104
 
105
  | True / Predicted | NOT_RELEVANT | RELEVANT |
106
  | --- | ---: | ---: |
107
- | NOT_RELEVANT | 378 | 92 |
108
- | RELEVANT | 60 | 232 |
109
 
110
  ### embedding-lightgbm_sentence_embeddings at threshold 0.500
111
 
112
  | True / Predicted | NOT_RELEVANT | RELEVANT |
113
  | --- | ---: | ---: |
114
- | NOT_RELEVANT | 401 | 69 |
115
- | RELEVANT | 78 | 214 |
116
 
117
- ### embedding-lightgbm_sentence_embeddings at threshold 0.540
118
 
119
  | True / Predicted | NOT_RELEVANT | RELEVANT |
120
  | --- | ---: | ---: |
121
- | NOT_RELEVANT | 404 | 66 |
122
- | RELEVANT | 82 | 210 |
123
 
124
  ### transformer at threshold 0.500
125
 
126
  | True / Predicted | NOT_RELEVANT | RELEVANT |
127
  | --- | ---: | ---: |
128
- | NOT_RELEVANT | 415 | 55 |
129
- | RELEVANT | 36 | 256 |
130
 
131
- ### transformer at threshold 0.581
132
 
133
  | True / Predicted | NOT_RELEVANT | RELEVANT |
134
  | --- | ---: | ---: |
135
- | NOT_RELEVANT | 418 | 52 |
136
- | RELEVANT | 39 | 253 |
137
 
138
 
139
  ## Validation-Tuned Thresholds
140
 
141
  - `logistic_tfidf`: threshold `0.470` (validation F1 `0.779`); test F1 change vs 0.5: `+0.005`.
142
  - `xgboost_tfidf`: threshold `0.520` (validation F1 `0.822`); test F1 change vs 0.5: `-0.001`.
143
- - `embedding-logistic_sentence_embeddings`: threshold `0.564` (validation F1 `0.730`); test F1 change vs 0.5: `-0.012`.
144
- - `embedding-svm_sentence_embeddings`: threshold `0.406` (validation F1 `0.734`); test F1 change vs 0.5: `+0.018`.
145
- - `embedding-lightgbm_sentence_embeddings`: threshold `0.540` (validation F1 `0.748`); test F1 change vs 0.5: `-0.005`.
146
- - `transformer`: threshold `0.581` (validation F1 `0.878`); test F1 change vs 0.5: `-0.002`.
147
 
148
  ## Artifacts
149
 
@@ -159,7 +179,7 @@ Rows are true labels and columns are predicted labels.
159
  Install the runtime dependencies:
160
 
161
  ```bash
162
- pip install transformers torch huggingface_hub pandas joblib scikit-learn xgboost sentence-transformers lightgbm
163
  ```
164
 
165
  ### Transformer
@@ -168,7 +188,7 @@ pip install transformers torch huggingface_hub pandas joblib scikit-learn xgboos
168
  import torch
169
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
170
 
171
- MODEL_ID = "faodl/agri-wheat-classifier"
172
 
173
  texts = [
174
  "Rice export prices increased after new procurement rules were announced.",
@@ -205,7 +225,7 @@ import json
205
  import joblib
206
  from huggingface_hub import hf_hub_download
207
 
208
- MODEL_ID = "faodl/agri-wheat-classifier"
209
  BASELINE = "logistic"
210
 
211
  texts = [
@@ -246,10 +266,11 @@ Available embedding baseline names in this run: "embedding-logistic", "embedding
246
 
247
  ```python
248
  import joblib
 
249
  from huggingface_hub import hf_hub_download
250
- from sentence_transformers import SentenceTransformer
251
 
252
- MODEL_ID = "faodl/agri-wheat-classifier"
253
  BASELINE = "embedding-logistic"
254
 
255
  texts = [
@@ -263,13 +284,31 @@ model_path = hf_hub_download(
263
  filename=f"baselines/{BASELINE}/{BASELINE}.joblib",
264
  )
265
  artifact = joblib.load(model_path)
266
- embedding_model = SentenceTransformer(artifact["embedding_model_name"])
267
- embeddings = embedding_model.encode(
268
- texts,
269
- batch_size=artifact.get("embedding_batch_size", 64),
270
- convert_to_numpy=True,
271
- normalize_embeddings=artifact.get("normalize_embeddings", True),
272
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
  probabilities = artifact["classifier"].predict_proba(embeddings)[:, 1]
274
  threshold = artifact["validation_best_threshold"]["threshold"]
275
 
 
1
  ---
2
  library_name: transformers
3
  pipeline_tag: text-classification
4
+ base_model: FacebookAI/xlm-roberta-base
5
  tags:
6
  - text-classification
7
  - binary-classification
 
17
 
18
  - Dataset: `faodl/amis-agri-wheat`
19
  - Dataset subset: ``
20
+ - Dataset revision: `main`
21
  - Text column: `chunk_text`
22
  - Label column: `label`
23
+ - Transformer: `FacebookAI/xlm-roberta-base`
24
+ - Generated at: `2026-05-29T18:13:08.384805+00:00`
25
 
26
  ## Dataset Summary
27
 
 
31
  | validation | 759 | 486 | 273 | 396 | 636.7 |
32
  | test | 762 | 470 | 292 | 397 | 643.3 |
33
 
34
+ ## Threshold Comparison on Validation Split
35
+
36
+ Validation metrics document threshold selection and tuning behavior; test metrics remain the primary estimate of out-of-sample performance.
37
+
38
+ | Model | Threshold | Accuracy | Precision | Recall | F1 | ROC AUC | Average precision |
39
+ | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
40
+ | logistic_tfidf | 0.500 | 0.818 | 0.718 | 0.813 | 0.763 | 0.907 | 0.867 |
41
+ | logistic_tfidf | 0.470 | 0.823 | 0.709 | 0.864 | 0.779 | 0.907 | 0.867 |
42
+ | xgboost_tfidf | 0.500 | 0.868 | 0.808 | 0.832 | 0.819 | 0.935 | 0.892 |
43
+ | xgboost_tfidf | 0.520 | 0.871 | 0.816 | 0.828 | 0.822 | 0.935 | 0.892 |
44
+ | embedding-logistic_sentence_embeddings | 0.500 | 0.783 | 0.658 | 0.824 | 0.732 | 0.862 | 0.780 |
45
+ | embedding-logistic_sentence_embeddings | 0.521 | 0.791 | 0.673 | 0.813 | 0.736 | 0.862 | 0.780 |
46
+ | embedding-svm_sentence_embeddings | 0.500 | 0.804 | 0.714 | 0.758 | 0.735 | 0.869 | 0.792 |
47
+ | embedding-svm_sentence_embeddings | 0.473 | 0.805 | 0.704 | 0.791 | 0.745 | 0.869 | 0.792 |
48
+ | embedding-lightgbm_sentence_embeddings | 0.500 | 0.791 | 0.694 | 0.747 | 0.720 | 0.868 | 0.786 |
49
+ | embedding-lightgbm_sentence_embeddings | 0.433 | 0.800 | 0.693 | 0.795 | 0.741 | 0.868 | 0.786 |
50
+ | transformer | 0.500 | 0.925 | 0.894 | 0.897 | 0.896 | 0.956 | 0.914 |
51
+ | transformer | 0.203 | 0.926 | 0.883 | 0.916 | 0.899 | 0.956 | 0.914 |
52
+
53
  ## Threshold Comparison on Test Split
54
 
55
  | Model | Threshold | Accuracy | Precision | Recall | F1 | ROC AUC | Average precision |
 
58
  | logistic_tfidf | 0.470 | 0.797 | 0.688 | 0.860 | 0.764 | 0.888 | 0.827 |
59
  | xgboost_tfidf | 0.500 | 0.835 | 0.773 | 0.805 | 0.789 | 0.910 | 0.831 |
60
  | xgboost_tfidf | 0.520 | 0.835 | 0.777 | 0.798 | 0.787 | 0.910 | 0.831 |
61
+ | embedding-logistic_sentence_embeddings | 0.500 | 0.782 | 0.699 | 0.757 | 0.727 | 0.877 | 0.821 |
62
+ | embedding-logistic_sentence_embeddings | 0.521 | 0.789 | 0.713 | 0.750 | 0.731 | 0.877 | 0.821 |
63
+ | embedding-svm_sentence_embeddings | 0.500 | 0.818 | 0.778 | 0.733 | 0.755 | 0.883 | 0.824 |
64
+ | embedding-svm_sentence_embeddings | 0.473 | 0.812 | 0.758 | 0.750 | 0.754 | 0.883 | 0.824 |
65
+ | embedding-lightgbm_sentence_embeddings | 0.500 | 0.798 | 0.740 | 0.729 | 0.734 | 0.892 | 0.847 |
66
+ | embedding-lightgbm_sentence_embeddings | 0.433 | 0.806 | 0.735 | 0.771 | 0.753 | 0.892 | 0.847 |
67
+ | transformer | 0.500 | 0.885 | 0.862 | 0.832 | 0.847 | 0.943 | 0.915 |
68
+ | transformer | 0.203 | 0.890 | 0.854 | 0.860 | 0.857 | 0.943 | 0.915 |
69
 
70
  ## Confusion Matrices on Test Split
71
 
 
103
 
104
  | True / Predicted | NOT_RELEVANT | RELEVANT |
105
  | --- | ---: | ---: |
106
+ | NOT_RELEVANT | 375 | 95 |
107
+ | RELEVANT | 71 | 221 |
108
 
109
+ ### embedding-logistic_sentence_embeddings at threshold 0.521
110
 
111
  | True / Predicted | NOT_RELEVANT | RELEVANT |
112
  | --- | ---: | ---: |
113
+ | NOT_RELEVANT | 382 | 88 |
114
+ | RELEVANT | 73 | 219 |
115
 
116
  ### embedding-svm_sentence_embeddings at threshold 0.500
117
 
118
  | True / Predicted | NOT_RELEVANT | RELEVANT |
119
  | --- | ---: | ---: |
120
+ | NOT_RELEVANT | 409 | 61 |
121
+ | RELEVANT | 78 | 214 |
122
 
123
+ ### embedding-svm_sentence_embeddings at threshold 0.473
124
 
125
  | True / Predicted | NOT_RELEVANT | RELEVANT |
126
  | --- | ---: | ---: |
127
+ | NOT_RELEVANT | 400 | 70 |
128
+ | RELEVANT | 73 | 219 |
129
 
130
  ### embedding-lightgbm_sentence_embeddings at threshold 0.500
131
 
132
  | True / Predicted | NOT_RELEVANT | RELEVANT |
133
  | --- | ---: | ---: |
134
+ | NOT_RELEVANT | 395 | 75 |
135
+ | RELEVANT | 79 | 213 |
136
 
137
+ ### embedding-lightgbm_sentence_embeddings at threshold 0.433
138
 
139
  | True / Predicted | NOT_RELEVANT | RELEVANT |
140
  | --- | ---: | ---: |
141
+ | NOT_RELEVANT | 389 | 81 |
142
+ | RELEVANT | 67 | 225 |
143
 
144
  ### transformer at threshold 0.500
145
 
146
  | True / Predicted | NOT_RELEVANT | RELEVANT |
147
  | --- | ---: | ---: |
148
+ | NOT_RELEVANT | 431 | 39 |
149
+ | RELEVANT | 49 | 243 |
150
 
151
+ ### transformer at threshold 0.203
152
 
153
  | True / Predicted | NOT_RELEVANT | RELEVANT |
154
  | --- | ---: | ---: |
155
+ | NOT_RELEVANT | 427 | 43 |
156
+ | RELEVANT | 41 | 251 |
157
 
158
 
159
  ## Validation-Tuned Thresholds
160
 
161
  - `logistic_tfidf`: threshold `0.470` (validation F1 `0.779`); test F1 change vs 0.5: `+0.005`.
162
  - `xgboost_tfidf`: threshold `0.520` (validation F1 `0.822`); test F1 change vs 0.5: `-0.001`.
163
+ - `embedding-logistic_sentence_embeddings`: threshold `0.521` (validation F1 `0.736`); test F1 change vs 0.5: `+0.004`.
164
+ - `embedding-svm_sentence_embeddings`: threshold `0.473` (validation F1 `0.745`); test F1 change vs 0.5: `-0.001`.
165
+ - `embedding-lightgbm_sentence_embeddings`: threshold `0.433` (validation F1 `0.741`); test F1 change vs 0.5: `+0.018`.
166
+ - `transformer`: threshold `0.203` (validation F1 `0.899`); test F1 change vs 0.5: `+0.010`.
167
 
168
  ## Artifacts
169
 
 
179
  Install the runtime dependencies:
180
 
181
  ```bash
182
+ pip install transformers torch huggingface_hub pandas joblib scikit-learn xgboost lightgbm
183
  ```
184
 
185
  ### Transformer
 
188
  import torch
189
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
190
 
191
+ MODEL_ID = "YOUR_USERNAME/YOUR_MODEL_REPO"
192
 
193
  texts = [
194
  "Rice export prices increased after new procurement rules were announced.",
 
225
  import joblib
226
  from huggingface_hub import hf_hub_download
227
 
228
+ MODEL_ID = "YOUR_USERNAME/YOUR_MODEL_REPO"
229
  BASELINE = "logistic"
230
 
231
  texts = [
 
266
 
267
  ```python
268
  import joblib
269
+ import torch
270
  from huggingface_hub import hf_hub_download
271
+ from transformers import AutoModel, AutoTokenizer
272
 
273
+ MODEL_ID = "YOUR_USERNAME/YOUR_MODEL_REPO"
274
  BASELINE = "embedding-logistic"
275
 
276
  texts = [
 
284
  filename=f"baselines/{BASELINE}/{BASELINE}.joblib",
285
  )
286
  artifact = joblib.load(model_path)
287
+ tokenizer = AutoTokenizer.from_pretrained(artifact["embedding_model_name"])
288
+ encoder = AutoModel.from_pretrained(artifact["embedding_model_name"])
289
+ encoder.eval()
290
+
291
+ encoded_batches = []
292
+ batch_size = artifact.get("embedding_batch_size", 64)
293
+ for start in range(0, len(texts), batch_size):
294
+ batch_texts = texts[start : start + batch_size]
295
+ inputs = tokenizer(
296
+ batch_texts,
297
+ padding=True,
298
+ truncation=True,
299
+ max_length=artifact.get("embedding_max_length", 256),
300
+ return_tensors="pt",
301
+ )
302
+ with torch.no_grad():
303
+ outputs = encoder(**inputs)
304
+ token_embeddings = outputs.last_hidden_state
305
+ attention_mask = inputs["attention_mask"].unsqueeze(-1).to(token_embeddings.dtype)
306
+ embeddings = (token_embeddings * attention_mask).sum(dim=1)
307
+ embeddings = embeddings / attention_mask.sum(dim=1).clamp(min=1e-9)
308
+ if artifact.get("normalize_embeddings", True):
309
+ embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
310
+ encoded_batches.append(embeddings)
311
+ embeddings = torch.cat(encoded_batches).numpy()
312
  probabilities = artifact["classifier"].predict_proba(embeddings)[:, 1]
313
  threshold = artifact["validation_best_threshold"]["threshold"]
314
 
REPORT.md CHANGED
@@ -2,10 +2,11 @@
2
 
3
  - Dataset: `faodl/amis-agri-wheat`
4
  - Dataset subset: ``
 
5
  - Text column: `chunk_text`
6
  - Label column: `label`
7
- - Transformer: `distilbert/distilbert-base-multilingual-cased`
8
- - Generated at: `2026-05-19T20:24:29.088047+00:00`
9
 
10
  ## Dataset Summary
11
 
@@ -15,6 +16,25 @@
15
  | validation | 759 | 486 | 273 | 396 | 636.7 |
16
  | test | 762 | 470 | 292 | 397 | 643.3 |
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  ## Threshold Comparison on Test Split
19
 
20
  | Model | Threshold | Accuracy | Precision | Recall | F1 | ROC AUC | Average precision |
@@ -23,14 +43,14 @@
23
  | logistic_tfidf | 0.470 | 0.797 | 0.688 | 0.860 | 0.764 | 0.888 | 0.827 |
24
  | xgboost_tfidf | 0.500 | 0.835 | 0.773 | 0.805 | 0.789 | 0.910 | 0.831 |
25
  | xgboost_tfidf | 0.520 | 0.835 | 0.777 | 0.798 | 0.787 | 0.910 | 0.831 |
26
- | embedding-logistic_sentence_embeddings | 0.500 | 0.795 | 0.711 | 0.784 | 0.746 | 0.880 | 0.825 |
27
- | embedding-logistic_sentence_embeddings | 0.564 | 0.799 | 0.746 | 0.723 | 0.734 | 0.880 | 0.825 |
28
- | embedding-svm_sentence_embeddings | 0.500 | 0.811 | 0.794 | 0.685 | 0.735 | 0.884 | 0.828 |
29
- | embedding-svm_sentence_embeddings | 0.406 | 0.801 | 0.716 | 0.795 | 0.753 | 0.884 | 0.828 |
30
- | embedding-lightgbm_sentence_embeddings | 0.500 | 0.807 | 0.756 | 0.733 | 0.744 | 0.888 | 0.823 |
31
- | embedding-lightgbm_sentence_embeddings | 0.540 | 0.806 | 0.761 | 0.719 | 0.739 | 0.888 | 0.823 |
32
- | transformer | 0.500 | 0.881 | 0.823 | 0.877 | 0.849 | 0.944 | 0.905 |
33
- | transformer | 0.581 | 0.881 | 0.830 | 0.866 | 0.848 | 0.944 | 0.905 |
34
 
35
  ## Confusion Matrices on Test Split
36
 
@@ -68,67 +88,67 @@ Rows are true labels and columns are predicted labels.
68
 
69
  | True / Predicted | NOT_RELEVANT | RELEVANT |
70
  | --- | ---: | ---: |
71
- | NOT_RELEVANT | 377 | 93 |
72
- | RELEVANT | 63 | 229 |
73
 
74
- ### embedding-logistic_sentence_embeddings at threshold 0.564
75
 
76
  | True / Predicted | NOT_RELEVANT | RELEVANT |
77
  | --- | ---: | ---: |
78
- | NOT_RELEVANT | 398 | 72 |
79
- | RELEVANT | 81 | 211 |
80
 
81
  ### embedding-svm_sentence_embeddings at threshold 0.500
82
 
83
  | True / Predicted | NOT_RELEVANT | RELEVANT |
84
  | --- | ---: | ---: |
85
- | NOT_RELEVANT | 418 | 52 |
86
- | RELEVANT | 92 | 200 |
87
 
88
- ### embedding-svm_sentence_embeddings at threshold 0.406
89
 
90
  | True / Predicted | NOT_RELEVANT | RELEVANT |
91
  | --- | ---: | ---: |
92
- | NOT_RELEVANT | 378 | 92 |
93
- | RELEVANT | 60 | 232 |
94
 
95
  ### embedding-lightgbm_sentence_embeddings at threshold 0.500
96
 
97
  | True / Predicted | NOT_RELEVANT | RELEVANT |
98
  | --- | ---: | ---: |
99
- | NOT_RELEVANT | 401 | 69 |
100
- | RELEVANT | 78 | 214 |
101
 
102
- ### embedding-lightgbm_sentence_embeddings at threshold 0.540
103
 
104
  | True / Predicted | NOT_RELEVANT | RELEVANT |
105
  | --- | ---: | ---: |
106
- | NOT_RELEVANT | 404 | 66 |
107
- | RELEVANT | 82 | 210 |
108
 
109
  ### transformer at threshold 0.500
110
 
111
  | True / Predicted | NOT_RELEVANT | RELEVANT |
112
  | --- | ---: | ---: |
113
- | NOT_RELEVANT | 415 | 55 |
114
- | RELEVANT | 36 | 256 |
115
 
116
- ### transformer at threshold 0.581
117
 
118
  | True / Predicted | NOT_RELEVANT | RELEVANT |
119
  | --- | ---: | ---: |
120
- | NOT_RELEVANT | 418 | 52 |
121
- | RELEVANT | 39 | 253 |
122
 
123
 
124
  ## Validation-Tuned Thresholds
125
 
126
  - `logistic_tfidf`: threshold `0.470` (validation F1 `0.779`); test F1 change vs 0.5: `+0.005`.
127
  - `xgboost_tfidf`: threshold `0.520` (validation F1 `0.822`); test F1 change vs 0.5: `-0.001`.
128
- - `embedding-logistic_sentence_embeddings`: threshold `0.564` (validation F1 `0.730`); test F1 change vs 0.5: `-0.012`.
129
- - `embedding-svm_sentence_embeddings`: threshold `0.406` (validation F1 `0.734`); test F1 change vs 0.5: `+0.018`.
130
- - `embedding-lightgbm_sentence_embeddings`: threshold `0.540` (validation F1 `0.748`); test F1 change vs 0.5: `-0.005`.
131
- - `transformer`: threshold `0.581` (validation F1 `0.878`); test F1 change vs 0.5: `-0.002`.
132
 
133
  ## Artifacts
134
 
 
2
 
3
  - Dataset: `faodl/amis-agri-wheat`
4
  - Dataset subset: ``
5
+ - Dataset revision: `main`
6
  - Text column: `chunk_text`
7
  - Label column: `label`
8
+ - Transformer: `FacebookAI/xlm-roberta-base`
9
+ - Generated at: `2026-05-29T18:13:08.384805+00:00`
10
 
11
  ## Dataset Summary
12
 
 
16
  | validation | 759 | 486 | 273 | 396 | 636.7 |
17
  | test | 762 | 470 | 292 | 397 | 643.3 |
18
 
19
+ ## Threshold Comparison on Validation Split
20
+
21
+ Validation metrics document threshold selection and tuning behavior; test metrics remain the primary estimate of out-of-sample performance.
22
+
23
+ | Model | Threshold | Accuracy | Precision | Recall | F1 | ROC AUC | Average precision |
24
+ | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
25
+ | logistic_tfidf | 0.500 | 0.818 | 0.718 | 0.813 | 0.763 | 0.907 | 0.867 |
26
+ | logistic_tfidf | 0.470 | 0.823 | 0.709 | 0.864 | 0.779 | 0.907 | 0.867 |
27
+ | xgboost_tfidf | 0.500 | 0.868 | 0.808 | 0.832 | 0.819 | 0.935 | 0.892 |
28
+ | xgboost_tfidf | 0.520 | 0.871 | 0.816 | 0.828 | 0.822 | 0.935 | 0.892 |
29
+ | embedding-logistic_sentence_embeddings | 0.500 | 0.783 | 0.658 | 0.824 | 0.732 | 0.862 | 0.780 |
30
+ | embedding-logistic_sentence_embeddings | 0.521 | 0.791 | 0.673 | 0.813 | 0.736 | 0.862 | 0.780 |
31
+ | embedding-svm_sentence_embeddings | 0.500 | 0.804 | 0.714 | 0.758 | 0.735 | 0.869 | 0.792 |
32
+ | embedding-svm_sentence_embeddings | 0.473 | 0.805 | 0.704 | 0.791 | 0.745 | 0.869 | 0.792 |
33
+ | embedding-lightgbm_sentence_embeddings | 0.500 | 0.791 | 0.694 | 0.747 | 0.720 | 0.868 | 0.786 |
34
+ | embedding-lightgbm_sentence_embeddings | 0.433 | 0.800 | 0.693 | 0.795 | 0.741 | 0.868 | 0.786 |
35
+ | transformer | 0.500 | 0.925 | 0.894 | 0.897 | 0.896 | 0.956 | 0.914 |
36
+ | transformer | 0.203 | 0.926 | 0.883 | 0.916 | 0.899 | 0.956 | 0.914 |
37
+
38
  ## Threshold Comparison on Test Split
39
 
40
  | Model | Threshold | Accuracy | Precision | Recall | F1 | ROC AUC | Average precision |
 
43
  | logistic_tfidf | 0.470 | 0.797 | 0.688 | 0.860 | 0.764 | 0.888 | 0.827 |
44
  | xgboost_tfidf | 0.500 | 0.835 | 0.773 | 0.805 | 0.789 | 0.910 | 0.831 |
45
  | xgboost_tfidf | 0.520 | 0.835 | 0.777 | 0.798 | 0.787 | 0.910 | 0.831 |
46
+ | embedding-logistic_sentence_embeddings | 0.500 | 0.782 | 0.699 | 0.757 | 0.727 | 0.877 | 0.821 |
47
+ | embedding-logistic_sentence_embeddings | 0.521 | 0.789 | 0.713 | 0.750 | 0.731 | 0.877 | 0.821 |
48
+ | embedding-svm_sentence_embeddings | 0.500 | 0.818 | 0.778 | 0.733 | 0.755 | 0.883 | 0.824 |
49
+ | embedding-svm_sentence_embeddings | 0.473 | 0.812 | 0.758 | 0.750 | 0.754 | 0.883 | 0.824 |
50
+ | embedding-lightgbm_sentence_embeddings | 0.500 | 0.798 | 0.740 | 0.729 | 0.734 | 0.892 | 0.847 |
51
+ | embedding-lightgbm_sentence_embeddings | 0.433 | 0.806 | 0.735 | 0.771 | 0.753 | 0.892 | 0.847 |
52
+ | transformer | 0.500 | 0.885 | 0.862 | 0.832 | 0.847 | 0.943 | 0.915 |
53
+ | transformer | 0.203 | 0.890 | 0.854 | 0.860 | 0.857 | 0.943 | 0.915 |
54
 
55
  ## Confusion Matrices on Test Split
56
 
 
88
 
89
  | True / Predicted | NOT_RELEVANT | RELEVANT |
90
  | --- | ---: | ---: |
91
+ | NOT_RELEVANT | 375 | 95 |
92
+ | RELEVANT | 71 | 221 |
93
 
94
+ ### embedding-logistic_sentence_embeddings at threshold 0.521
95
 
96
  | True / Predicted | NOT_RELEVANT | RELEVANT |
97
  | --- | ---: | ---: |
98
+ | NOT_RELEVANT | 382 | 88 |
99
+ | RELEVANT | 73 | 219 |
100
 
101
  ### embedding-svm_sentence_embeddings at threshold 0.500
102
 
103
  | True / Predicted | NOT_RELEVANT | RELEVANT |
104
  | --- | ---: | ---: |
105
+ | NOT_RELEVANT | 409 | 61 |
106
+ | RELEVANT | 78 | 214 |
107
 
108
+ ### embedding-svm_sentence_embeddings at threshold 0.473
109
 
110
  | True / Predicted | NOT_RELEVANT | RELEVANT |
111
  | --- | ---: | ---: |
112
+ | NOT_RELEVANT | 400 | 70 |
113
+ | RELEVANT | 73 | 219 |
114
 
115
  ### embedding-lightgbm_sentence_embeddings at threshold 0.500
116
 
117
  | True / Predicted | NOT_RELEVANT | RELEVANT |
118
  | --- | ---: | ---: |
119
+ | NOT_RELEVANT | 395 | 75 |
120
+ | RELEVANT | 79 | 213 |
121
 
122
+ ### embedding-lightgbm_sentence_embeddings at threshold 0.433
123
 
124
  | True / Predicted | NOT_RELEVANT | RELEVANT |
125
  | --- | ---: | ---: |
126
+ | NOT_RELEVANT | 389 | 81 |
127
+ | RELEVANT | 67 | 225 |
128
 
129
  ### transformer at threshold 0.500
130
 
131
  | True / Predicted | NOT_RELEVANT | RELEVANT |
132
  | --- | ---: | ---: |
133
+ | NOT_RELEVANT | 431 | 39 |
134
+ | RELEVANT | 49 | 243 |
135
 
136
+ ### transformer at threshold 0.203
137
 
138
  | True / Predicted | NOT_RELEVANT | RELEVANT |
139
  | --- | ---: | ---: |
140
+ | NOT_RELEVANT | 427 | 43 |
141
+ | RELEVANT | 41 | 251 |
142
 
143
 
144
  ## Validation-Tuned Thresholds
145
 
146
  - `logistic_tfidf`: threshold `0.470` (validation F1 `0.779`); test F1 change vs 0.5: `+0.005`.
147
  - `xgboost_tfidf`: threshold `0.520` (validation F1 `0.822`); test F1 change vs 0.5: `-0.001`.
148
+ - `embedding-logistic_sentence_embeddings`: threshold `0.521` (validation F1 `0.736`); test F1 change vs 0.5: `+0.004`.
149
+ - `embedding-svm_sentence_embeddings`: threshold `0.473` (validation F1 `0.745`); test F1 change vs 0.5: `-0.001`.
150
+ - `embedding-lightgbm_sentence_embeddings`: threshold `0.433` (validation F1 `0.741`); test F1 change vs 0.5: `+0.018`.
151
+ - `transformer`: threshold `0.203` (validation F1 `0.899`); test F1 change vs 0.5: `+0.010`.
152
 
153
  ## Artifacts
154
 
baselines/embedding-lightgbm/embedding-lightgbm.joblib CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3f56f59c1281405d56efca4d691933dd542cee67012f1c7345dd76701fba6d7
3
- size 1444644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3463bed6fbc93713eabd1e10f2dbb63b05f5ce1533a4ea9aaab45a753bcb5c21
3
+ size 1444574
baselines/embedding-lightgbm/test_predictions.csv CHANGED
The diff for this file is too large to render. See raw diff
 
baselines/embedding-lightgbm/validation_predictions.csv CHANGED
The diff for this file is too large to render. See raw diff
 
baselines/embedding-logistic/embedding-logistic.joblib CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8675c256f2bd7cc388d6bc6a3c61e5930f5c33b4275649c9d831f344e8412fa4
3
- size 4287
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:645213b2f532b79aa9381ce2c83e1e1d47957dba20521077adcc302ac5c6a27b
3
+ size 4361
baselines/embedding-logistic/test_predictions.csv CHANGED
The diff for this file is too large to render. See raw diff
 
baselines/embedding-logistic/validation_predictions.csv CHANGED
The diff for this file is too large to render. See raw diff
 
baselines/embedding-svm/embedding-svm.joblib CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61cbe1c99c38b69262a096054e3563b97479585fffafff10400538e65b20fdcb
3
- size 11696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c464a859cf16feb638dfa769b9156542b9e1a35d82c9ab9035abf8ec110c44d6
3
+ size 11770
baselines/embedding-svm/test_predictions.csv CHANGED
The diff for this file is too large to render. See raw diff
 
baselines/embedding-svm/validation_predictions.csv CHANGED
The diff for this file is too large to render. See raw diff
 
report.json CHANGED
@@ -1,8 +1,9 @@
1
  {
2
- "created_at": "2026-05-19T20:24:29.088047+00:00",
3
  "config": {
4
  "hf_dataset": "faodl/amis-agri-wheat",
5
  "hf_subset": null,
 
6
  "train_split": "train",
7
  "validation_split": "validation",
8
  "test_split": "test",
@@ -10,7 +11,7 @@
10
  "label_col": "label",
11
  "group_col": "id",
12
  "id_col": "chunk_id",
13
- "model_name": "distilbert/distilbert-base-multilingual-cased",
14
  "output_dir": "/content/agri-wheat-classifier",
15
  "max_length": 256,
16
  "learning_rate": 2e-05,
@@ -37,8 +38,8 @@
37
  "embedding_batch_size": 64,
38
  "positive_label_name": "RELEVANT",
39
  "negative_label_name": "NOT_RELEVANT",
40
- "push_to_hub": true,
41
- "hub_model_id": "faodl/agri-wheat-classifier",
42
  "hub_private_repo": false
43
  },
44
  "dataset_summary": {
@@ -85,6 +86,98 @@
85
  "precision": 0.7087087087087087,
86
  "recall": 0.8644688644688645
87
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  "test_default_0_5": {
89
  "threshold": 0.5,
90
  "accuracy": 0.8031496062992126,
@@ -189,6 +282,98 @@
189
  "precision": 0.8158844765342961,
190
  "recall": 0.8278388278388278
191
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  "test_default_0_5": {
193
  "threshold": 0.5,
194
  "accuracy": 0.8346456692913385,
@@ -289,102 +474,194 @@
289
  "artifact_dir": "/content/agri-wheat-classifier/baselines/embedding-logistic",
290
  "artifact_file": "/content/agri-wheat-classifier/baselines/embedding-logistic/embedding-logistic.joblib",
291
  "validation_best_threshold": {
292
- "threshold": 0.563937513505343,
293
- "f1": 0.7301038062283736,
294
- "precision": 0.6918032786885245,
295
- "recall": 0.7728937728937729
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
  },
297
  "test_default_0_5": {
298
  "threshold": 0.5,
299
- "accuracy": 0.7952755905511811,
300
- "precision": 0.7111801242236024,
301
- "recall": 0.7842465753424658,
302
- "f1": 0.745928338762215,
303
  "confusion_matrix": [
304
  [
305
- 377,
306
- 93
307
  ],
308
  [
309
- 63,
310
- 229
311
  ]
312
  ],
313
  "classification_report": {
314
  "NOT_RELEVANT": {
315
- "precision": 0.8568181818181818,
316
- "recall": 0.8021276595744681,
317
- "f1-score": 0.8285714285714286,
318
  "support": 470.0
319
  },
320
  "RELEVANT": {
321
- "precision": 0.7111801242236024,
322
- "recall": 0.7842465753424658,
323
- "f1-score": 0.745928338762215,
324
  "support": 292.0
325
  },
326
- "accuracy": 0.7952755905511811,
327
  "macro avg": {
328
- "precision": 0.7839991530208921,
329
- "recall": 0.7931871174584669,
330
- "f1-score": 0.7872498836668218,
331
  "support": 762.0
332
  },
333
  "weighted avg": {
334
- "precision": 0.8010093723462433,
335
- "recall": 0.7952755905511811,
336
- "f1-score": 0.7969024230277404,
337
  "support": 762.0
338
  }
339
  },
340
- "roc_auc": 0.8801005537744098,
341
- "average_precision": 0.8246628238347572
342
  },
343
  "test_optimal_threshold": {
344
- "threshold": 0.563937513505343,
345
- "accuracy": 0.7992125984251969,
346
- "precision": 0.7455830388692579,
347
- "recall": 0.7226027397260274,
348
- "f1": 0.7339130434782609,
349
  "confusion_matrix": [
350
  [
351
- 398,
352
- 72
353
  ],
354
  [
355
- 81,
356
- 211
357
  ]
358
  ],
359
  "classification_report": {
360
  "NOT_RELEVANT": {
361
- "precision": 0.8308977035490606,
362
- "recall": 0.8468085106382979,
363
- "f1-score": 0.8387776606954689,
364
  "support": 470.0
365
  },
366
  "RELEVANT": {
367
- "precision": 0.7455830388692579,
368
- "recall": 0.7226027397260274,
369
- "f1-score": 0.7339130434782609,
370
  "support": 292.0
371
  },
372
- "accuracy": 0.7992125984251969,
373
  "macro avg": {
374
- "precision": 0.7882403712091592,
375
- "recall": 0.7847056251821627,
376
- "f1-score": 0.7863453520868648,
377
  "support": 762.0
378
  },
379
  "weighted avg": {
380
- "precision": 0.7982049449053567,
381
- "recall": 0.7992125984251969,
382
- "f1-score": 0.7985933191896621,
383
  "support": 762.0
384
  }
385
  },
386
- "roc_auc": 0.8801005537744098,
387
- "average_precision": 0.8246628238347572
388
  }
389
  },
390
  {
@@ -394,102 +671,194 @@
394
  "artifact_dir": "/content/agri-wheat-classifier/baselines/embedding-svm",
395
  "artifact_file": "/content/agri-wheat-classifier/baselines/embedding-svm/embedding-svm.joblib",
396
  "validation_best_threshold": {
397
- "threshold": 0.40634467461913637,
398
- "f1": 0.7335526315789473,
399
- "precision": 0.6656716417910448,
400
- "recall": 0.8168498168498168
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
401
  },
402
  "test_default_0_5": {
403
  "threshold": 0.5,
404
- "accuracy": 0.8110236220472441,
405
- "precision": 0.7936507936507936,
406
- "recall": 0.684931506849315,
407
- "f1": 0.7352941176470589,
408
  "confusion_matrix": [
409
  [
410
- 418,
411
- 52
412
  ],
413
  [
414
- 92,
415
- 200
416
  ]
417
  ],
418
  "classification_report": {
419
  "NOT_RELEVANT": {
420
- "precision": 0.8196078431372549,
421
- "recall": 0.8893617021276595,
422
- "f1-score": 0.8530612244897959,
423
  "support": 470.0
424
  },
425
  "RELEVANT": {
426
- "precision": 0.7936507936507936,
427
- "recall": 0.684931506849315,
428
- "f1-score": 0.7352941176470589,
429
  "support": 292.0
430
  },
431
- "accuracy": 0.8110236220472441,
432
  "macro avg": {
433
- "precision": 0.8066293183940243,
434
- "recall": 0.7871466044884873,
435
- "f1-score": 0.7941776710684274,
436
  "support": 762.0
437
  },
438
  "weighted avg": {
439
- "precision": 0.8096610472710519,
440
- "recall": 0.8110236220472441,
441
- "f1-score": 0.8079326218676448,
442
  "support": 762.0
443
  }
444
  },
445
- "roc_auc": 0.8844068784610901,
446
- "average_precision": 0.8275847340102567
447
  },
448
  "test_optimal_threshold": {
449
- "threshold": 0.40634467461913637,
450
- "accuracy": 0.800524934383202,
451
- "precision": 0.7160493827160493,
452
- "recall": 0.7945205479452054,
453
- "f1": 0.7532467532467533,
454
  "confusion_matrix": [
455
  [
456
- 378,
457
- 92
458
  ],
459
  [
460
- 60,
461
- 232
462
  ]
463
  ],
464
  "classification_report": {
465
  "NOT_RELEVANT": {
466
- "precision": 0.863013698630137,
467
- "recall": 0.8042553191489362,
468
- "f1-score": 0.8325991189427313,
469
  "support": 470.0
470
  },
471
  "RELEVANT": {
472
- "precision": 0.7160493827160493,
473
- "recall": 0.7945205479452054,
474
- "f1-score": 0.7532467532467533,
475
  "support": 292.0
476
  },
477
- "accuracy": 0.800524934383202,
478
  "macro avg": {
479
- "precision": 0.7895315406730932,
480
- "recall": 0.7993879335470708,
481
- "f1-score": 0.7929229360947423,
482
  "support": 762.0
483
  },
484
  "weighted avg": {
485
- "precision": 0.8066966641853685,
486
- "recall": 0.800524934383202,
487
- "f1-score": 0.802191125788892,
488
  "support": 762.0
489
  }
490
  },
491
- "roc_auc": 0.8844068784610901,
492
- "average_precision": 0.8275847340102567
493
  }
494
  },
495
  {
@@ -499,205 +868,389 @@
499
  "artifact_dir": "/content/agri-wheat-classifier/baselines/embedding-lightgbm",
500
  "artifact_file": "/content/agri-wheat-classifier/baselines/embedding-lightgbm/embedding-lightgbm.joblib",
501
  "validation_best_threshold": {
502
- "threshold": 0.5404668230512064,
503
- "f1": 0.7482014388489209,
504
- "precision": 0.734982332155477,
505
- "recall": 0.7619047619047619
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
506
  },
507
  "test_default_0_5": {
508
  "threshold": 0.5,
509
- "accuracy": 0.8070866141732284,
510
- "precision": 0.7561837455830389,
511
- "recall": 0.7328767123287672,
512
- "f1": 0.7443478260869565,
513
  "confusion_matrix": [
514
  [
515
- 401,
516
- 69
517
  ],
518
  [
519
- 78,
520
- 214
521
  ]
522
  ],
523
  "classification_report": {
524
  "NOT_RELEVANT": {
525
- "precision": 0.837160751565762,
526
- "recall": 0.8531914893617021,
527
- "f1-score": 0.845100105374078,
528
  "support": 470.0
529
  },
530
  "RELEVANT": {
531
- "precision": 0.7561837455830389,
532
- "recall": 0.7328767123287672,
533
- "f1-score": 0.7443478260869565,
534
  "support": 292.0
535
  },
536
- "accuracy": 0.8070866141732284,
537
  "macro avg": {
538
- "precision": 0.7966722485744004,
539
- "recall": 0.7930341008452346,
540
- "f1-score": 0.7947239657305172,
541
  "support": 762.0
542
  },
543
  "weighted avg": {
544
- "precision": 0.8061301928427238,
545
- "recall": 0.8070866141732284,
546
- "f1-score": 0.8064916203979107,
547
  "support": 762.0
548
  }
549
  },
550
- "roc_auc": 0.8878825415330808,
551
- "average_precision": 0.8230959168683762
552
  },
553
  "test_optimal_threshold": {
554
- "threshold": 0.5404668230512064,
555
  "accuracy": 0.8057742782152231,
556
- "precision": 0.7608695652173914,
557
- "recall": 0.7191780821917808,
558
- "f1": 0.7394366197183099,
559
  "confusion_matrix": [
560
  [
561
- 404,
562
- 66
563
  ],
564
  [
565
- 82,
566
- 210
567
  ]
568
  ],
569
  "classification_report": {
570
  "NOT_RELEVANT": {
571
- "precision": 0.831275720164609,
572
- "recall": 0.8595744680851064,
573
- "f1-score": 0.8451882845188284,
574
  "support": 470.0
575
  },
576
  "RELEVANT": {
577
- "precision": 0.7608695652173914,
578
- "recall": 0.7191780821917808,
579
- "f1-score": 0.7394366197183099,
580
  "support": 292.0
581
  },
582
  "accuracy": 0.8057742782152231,
583
  "macro avg": {
584
- "precision": 0.7960726426910002,
585
- "recall": 0.7893762751384437,
586
- "f1-score": 0.7923124521185692,
587
  "support": 762.0
588
  },
589
  "weighted avg": {
590
- "precision": 0.8042959337543892,
591
  "recall": 0.8057742782152231,
592
- "f1-score": 0.80466402451653,
593
  "support": 762.0
594
  }
595
  },
596
- "roc_auc": 0.8878825415330808,
597
- "average_precision": 0.8230959168683762
598
  }
599
  },
600
  {
601
  "model_type": "transformer",
602
- "model_name": "distilbert/distilbert-base-multilingual-cased",
603
  "artifact_dir": "/content/agri-wheat-classifier/transformer",
604
  "validation_best_threshold": {
605
- "threshold": 0.5806344747543335,
606
- "f1": 0.8776978417266188,
607
- "precision": 0.8621908127208481,
608
- "recall": 0.8937728937728938
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
609
  },
610
  "test_default_0_5": {
611
  "threshold": 0.5,
612
- "accuracy": 0.8805774278215223,
613
- "precision": 0.8231511254019293,
614
- "recall": 0.8767123287671232,
615
- "f1": 0.8490878938640133,
616
  "confusion_matrix": [
617
  [
618
- 415,
619
- 55
620
  ],
621
  [
622
- 36,
623
- 256
624
  ]
625
  ],
626
  "classification_report": {
627
  "NOT_RELEVANT": {
628
- "precision": 0.9201773835920177,
629
- "recall": 0.8829787234042553,
630
- "f1-score": 0.9011943539630836,
631
  "support": 470.0
632
  },
633
  "RELEVANT": {
634
- "precision": 0.8231511254019293,
635
- "recall": 0.8767123287671232,
636
- "f1-score": 0.8490878938640133,
637
  "support": 292.0
638
  },
639
- "accuracy": 0.8805774278215223,
640
  "macro avg": {
641
- "precision": 0.8716642544969735,
642
- "recall": 0.8798455260856892,
643
- "f1-score": 0.8751411239135485,
644
  "support": 762.0
645
  },
646
  "weighted avg": {
647
- "precision": 0.8829967177238999,
648
- "recall": 0.8805774278215223,
649
- "f1-score": 0.8812270490432298,
650
  "support": 762.0
651
  }
652
  },
653
- "roc_auc": 0.9439886330515885,
654
- "average_precision": 0.905280557562998
655
  },
656
  "test_optimal_threshold": {
657
- "threshold": 0.5806344747543335,
658
- "accuracy": 0.8805774278215223,
659
- "precision": 0.8295081967213115,
660
- "recall": 0.8664383561643836,
661
- "f1": 0.847571189279732,
662
  "confusion_matrix": [
663
  [
664
- 418,
665
- 52
666
  ],
667
  [
668
- 39,
669
- 253
670
  ]
671
  ],
672
  "classification_report": {
673
  "NOT_RELEVANT": {
674
- "precision": 0.9146608315098468,
675
- "recall": 0.8893617021276595,
676
- "f1-score": 0.9018338727076591,
677
  "support": 470.0
678
  },
679
  "RELEVANT": {
680
- "precision": 0.8295081967213115,
681
- "recall": 0.8664383561643836,
682
- "f1-score": 0.847571189279732,
683
  "support": 292.0
684
  },
685
- "accuracy": 0.8805774278215223,
686
  "macro avg": {
687
- "precision": 0.8720845141155791,
688
- "recall": 0.8779000291460215,
689
- "f1-score": 0.8747025309936955,
690
  "support": 762.0
691
  },
692
  "weighted avg": {
693
- "precision": 0.8820301630606968,
694
- "recall": 0.8805774278215223,
695
- "f1-score": 0.8810402984806845,
696
  "support": 762.0
697
  }
698
  },
699
- "roc_auc": 0.9439886330515885,
700
- "average_precision": 0.905280557562998
701
  }
702
  }
703
  ]
 
1
  {
2
+ "created_at": "2026-05-29T18:13:08.384805+00:00",
3
  "config": {
4
  "hf_dataset": "faodl/amis-agri-wheat",
5
  "hf_subset": null,
6
+ "hf_revision": "main",
7
  "train_split": "train",
8
  "validation_split": "validation",
9
  "test_split": "test",
 
11
  "label_col": "label",
12
  "group_col": "id",
13
  "id_col": "chunk_id",
14
+ "model_name": "FacebookAI/xlm-roberta-base",
15
  "output_dir": "/content/agri-wheat-classifier",
16
  "max_length": 256,
17
  "learning_rate": 2e-05,
 
38
  "embedding_batch_size": 64,
39
  "positive_label_name": "RELEVANT",
40
  "negative_label_name": "NOT_RELEVANT",
41
+ "push_to_hub": false,
42
+ "hub_model_id": null,
43
  "hub_private_repo": false
44
  },
45
  "dataset_summary": {
 
86
  "precision": 0.7087087087087087,
87
  "recall": 0.8644688644688645
88
  },
89
+ "validation_default_0_5": {
90
+ "threshold": 0.5,
91
+ "accuracy": 0.8181818181818182,
92
+ "precision": 0.7184466019417476,
93
+ "recall": 0.8131868131868132,
94
+ "f1": 0.7628865979381443,
95
+ "confusion_matrix": [
96
+ [
97
+ 399,
98
+ 87
99
+ ],
100
+ [
101
+ 51,
102
+ 222
103
+ ]
104
+ ],
105
+ "classification_report": {
106
+ "NOT_RELEVANT": {
107
+ "precision": 0.8866666666666667,
108
+ "recall": 0.8209876543209876,
109
+ "f1-score": 0.8525641025641025,
110
+ "support": 486.0
111
+ },
112
+ "RELEVANT": {
113
+ "precision": 0.7184466019417476,
114
+ "recall": 0.8131868131868132,
115
+ "f1-score": 0.7628865979381443,
116
+ "support": 273.0
117
+ },
118
+ "accuracy": 0.8181818181818182,
119
+ "macro avg": {
120
+ "precision": 0.8025566343042072,
121
+ "recall": 0.8170872337539004,
122
+ "f1-score": 0.8077253502511235,
123
+ "support": 759.0
124
+ },
125
+ "weighted avg": {
126
+ "precision": 0.8261606354810239,
127
+ "recall": 0.8181818181818182,
128
+ "f1-score": 0.8203085574219594,
129
+ "support": 759.0
130
+ }
131
+ },
132
+ "roc_auc": 0.9070908515352959,
133
+ "average_precision": 0.8669161076776083
134
+ },
135
+ "validation_optimal_threshold": {
136
+ "threshold": 0.46961394282689456,
137
+ "accuracy": 0.8234519104084321,
138
+ "precision": 0.7087087087087087,
139
+ "recall": 0.8644688644688645,
140
+ "f1": 0.7788778877887789,
141
+ "confusion_matrix": [
142
+ [
143
+ 389,
144
+ 97
145
+ ],
146
+ [
147
+ 37,
148
+ 236
149
+ ]
150
+ ],
151
+ "classification_report": {
152
+ "NOT_RELEVANT": {
153
+ "precision": 0.9131455399061033,
154
+ "recall": 0.8004115226337448,
155
+ "f1-score": 0.8530701754385965,
156
+ "support": 486.0
157
+ },
158
+ "RELEVANT": {
159
+ "precision": 0.7087087087087087,
160
+ "recall": 0.8644688644688645,
161
+ "f1-score": 0.7788778877887789,
162
+ "support": 273.0
163
+ },
164
+ "accuracy": 0.8234519104084321,
165
+ "macro avg": {
166
+ "precision": 0.810927124307406,
167
+ "recall": 0.8324401935513046,
168
+ "f1-score": 0.8159740316136876,
169
+ "support": 759.0
170
+ },
171
+ "weighted avg": {
172
+ "precision": 0.8396129247323368,
173
+ "recall": 0.8234519104084321,
174
+ "f1-score": 0.8263844118965672,
175
+ "support": 759.0
176
+ }
177
+ },
178
+ "roc_auc": 0.9070908515352959,
179
+ "average_precision": 0.8669161076776083
180
+ },
181
  "test_default_0_5": {
182
  "threshold": 0.5,
183
  "accuracy": 0.8031496062992126,
 
282
  "precision": 0.8158844765342961,
283
  "recall": 0.8278388278388278
284
  },
285
+ "validation_default_0_5": {
286
+ "threshold": 0.5,
287
+ "accuracy": 0.8682476943346509,
288
+ "precision": 0.8078291814946619,
289
+ "recall": 0.8315018315018315,
290
+ "f1": 0.8194945848375451,
291
+ "confusion_matrix": [
292
+ [
293
+ 432,
294
+ 54
295
+ ],
296
+ [
297
+ 46,
298
+ 227
299
+ ]
300
+ ],
301
+ "classification_report": {
302
+ "NOT_RELEVANT": {
303
+ "precision": 0.9037656903765691,
304
+ "recall": 0.8888888888888888,
305
+ "f1-score": 0.8962655601659751,
306
+ "support": 486.0
307
+ },
308
+ "RELEVANT": {
309
+ "precision": 0.8078291814946619,
310
+ "recall": 0.8315018315018315,
311
+ "f1-score": 0.8194945848375451,
312
+ "support": 273.0
313
+ },
314
+ "accuracy": 0.8682476943346509,
315
+ "macro avg": {
316
+ "precision": 0.8557974359356155,
317
+ "recall": 0.8601953601953602,
318
+ "f1-score": 0.8578800725017601,
319
+ "support": 759.0
320
+ },
321
+ "weighted avg": {
322
+ "precision": 0.8692588828340648,
323
+ "recall": 0.8682476943346509,
324
+ "f1-score": 0.8686522844549588,
325
+ "support": 759.0
326
+ }
327
+ },
328
+ "roc_auc": 0.9346915087655827,
329
+ "average_precision": 0.8916128406855647
330
+ },
331
+ "validation_optimal_threshold": {
332
+ "threshold": 0.5195086002349854,
333
+ "accuracy": 0.8708827404479579,
334
+ "precision": 0.8158844765342961,
335
+ "recall": 0.8278388278388278,
336
+ "f1": 0.8218181818181818,
337
+ "confusion_matrix": [
338
+ [
339
+ 435,
340
+ 51
341
+ ],
342
+ [
343
+ 47,
344
+ 226
345
+ ]
346
+ ],
347
+ "classification_report": {
348
+ "NOT_RELEVANT": {
349
+ "precision": 0.9024896265560166,
350
+ "recall": 0.8950617283950617,
351
+ "f1-score": 0.8987603305785123,
352
+ "support": 486.0
353
+ },
354
+ "RELEVANT": {
355
+ "precision": 0.8158844765342961,
356
+ "recall": 0.8278388278388278,
357
+ "f1-score": 0.8218181818181818,
358
+ "support": 273.0
359
+ },
360
+ "accuracy": 0.8708827404479579,
361
+ "macro avg": {
362
+ "precision": 0.8591870515451563,
363
+ "recall": 0.8614502781169447,
364
+ "f1-score": 0.8602892561983471,
365
+ "support": 759.0
366
+ },
367
+ "weighted avg": {
368
+ "precision": 0.8713391575758721,
369
+ "recall": 0.8708827404479579,
370
+ "f1-score": 0.8710854865579982,
371
+ "support": 759.0
372
+ }
373
+ },
374
+ "roc_auc": 0.9346915087655827,
375
+ "average_precision": 0.8916128406855647
376
+ },
377
  "test_default_0_5": {
378
  "threshold": 0.5,
379
  "accuracy": 0.8346456692913385,
 
474
  "artifact_dir": "/content/agri-wheat-classifier/baselines/embedding-logistic",
475
  "artifact_file": "/content/agri-wheat-classifier/baselines/embedding-logistic/embedding-logistic.joblib",
476
  "validation_best_threshold": {
477
+ "threshold": 0.521271516877752,
478
+ "f1": 0.7363184079601991,
479
+ "precision": 0.6727272727272727,
480
+ "recall": 0.8131868131868132
481
+ },
482
+ "validation_default_0_5": {
483
+ "threshold": 0.5,
484
+ "accuracy": 0.782608695652174,
485
+ "precision": 0.6578947368421053,
486
+ "recall": 0.8241758241758241,
487
+ "f1": 0.7317073170731707,
488
+ "confusion_matrix": [
489
+ [
490
+ 369,
491
+ 117
492
+ ],
493
+ [
494
+ 48,
495
+ 225
496
+ ]
497
+ ],
498
+ "classification_report": {
499
+ "NOT_RELEVANT": {
500
+ "precision": 0.8848920863309353,
501
+ "recall": 0.7592592592592593,
502
+ "f1-score": 0.8172757475083057,
503
+ "support": 486.0
504
+ },
505
+ "RELEVANT": {
506
+ "precision": 0.6578947368421053,
507
+ "recall": 0.8241758241758241,
508
+ "f1-score": 0.7317073170731707,
509
+ "support": 273.0
510
+ },
511
+ "accuracy": 0.782608695652174,
512
+ "macro avg": {
513
+ "precision": 0.7713934115865203,
514
+ "recall": 0.7917175417175417,
515
+ "f1-score": 0.7744915322907382,
516
+ "support": 759.0
517
+ },
518
+ "weighted avg": {
519
+ "precision": 0.8032448183329766,
520
+ "recall": 0.782608695652174,
521
+ "f1-score": 0.7864981697628618,
522
+ "support": 759.0
523
+ }
524
+ },
525
+ "roc_auc": 0.8619213434028248,
526
+ "average_precision": 0.7798827839342581
527
+ },
528
+ "validation_optimal_threshold": {
529
+ "threshold": 0.521271516877752,
530
+ "accuracy": 0.7905138339920948,
531
+ "precision": 0.6727272727272727,
532
+ "recall": 0.8131868131868132,
533
+ "f1": 0.736318407960199,
534
+ "confusion_matrix": [
535
+ [
536
+ 378,
537
+ 108
538
+ ],
539
+ [
540
+ 51,
541
+ 222
542
+ ]
543
+ ],
544
+ "classification_report": {
545
+ "NOT_RELEVANT": {
546
+ "precision": 0.8811188811188811,
547
+ "recall": 0.7777777777777778,
548
+ "f1-score": 0.8262295081967214,
549
+ "support": 486.0
550
+ },
551
+ "RELEVANT": {
552
+ "precision": 0.6727272727272727,
553
+ "recall": 0.8131868131868132,
554
+ "f1-score": 0.736318407960199,
555
+ "support": 273.0
556
+ },
557
+ "accuracy": 0.7905138339920948,
558
+ "macro avg": {
559
+ "precision": 0.7769230769230769,
560
+ "recall": 0.7954822954822955,
561
+ "f1-score": 0.7812739580784602,
562
+ "support": 759.0
563
+ },
564
+ "weighted avg": {
565
+ "precision": 0.8061637966776306,
566
+ "recall": 0.7905138339920948,
567
+ "f1-score": 0.7938899424989999,
568
+ "support": 759.0
569
+ }
570
+ },
571
+ "roc_auc": 0.8619213434028248,
572
+ "average_precision": 0.7798827839342581
573
  },
574
  "test_default_0_5": {
575
  "threshold": 0.5,
576
+ "accuracy": 0.7821522309711286,
577
+ "precision": 0.6993670886075949,
578
+ "recall": 0.7568493150684932,
579
+ "f1": 0.7269736842105263,
580
  "confusion_matrix": [
581
  [
582
+ 375,
583
+ 95
584
  ],
585
  [
586
+ 71,
587
+ 221
588
  ]
589
  ],
590
  "classification_report": {
591
  "NOT_RELEVANT": {
592
+ "precision": 0.8408071748878924,
593
+ "recall": 0.7978723404255319,
594
+ "f1-score": 0.8187772925764192,
595
  "support": 470.0
596
  },
597
  "RELEVANT": {
598
+ "precision": 0.6993670886075949,
599
+ "recall": 0.7568493150684932,
600
+ "f1-score": 0.7269736842105263,
601
  "support": 292.0
602
  },
603
+ "accuracy": 0.7821522309711286,
604
  "macro avg": {
605
+ "precision": 0.7700871317477436,
606
+ "recall": 0.7773608277470125,
607
+ "f1-score": 0.7728754883934728,
608
  "support": 762.0
609
  },
610
  "weighted avg": {
611
+ "precision": 0.7866070368382246,
612
+ "recall": 0.7821522309711286,
613
+ "f1-score": 0.783597957087127,
614
  "support": 762.0
615
  }
616
  },
617
+ "roc_auc": 0.8769309239288836,
618
+ "average_precision": 0.8205790680541594
619
  },
620
  "test_optimal_threshold": {
621
+ "threshold": 0.521271516877752,
622
+ "accuracy": 0.7887139107611548,
623
+ "precision": 0.7133550488599348,
624
+ "recall": 0.75,
625
+ "f1": 0.7312186978297162,
626
  "confusion_matrix": [
627
  [
628
+ 382,
629
+ 88
630
  ],
631
  [
632
+ 73,
633
+ 219
634
  ]
635
  ],
636
  "classification_report": {
637
  "NOT_RELEVANT": {
638
+ "precision": 0.8395604395604396,
639
+ "recall": 0.8127659574468085,
640
+ "f1-score": 0.825945945945946,
641
  "support": 470.0
642
  },
643
  "RELEVANT": {
644
+ "precision": 0.7133550488599348,
645
+ "recall": 0.75,
646
+ "f1-score": 0.7312186978297162,
647
  "support": 292.0
648
  },
649
+ "accuracy": 0.7887139107611548,
650
  "macro avg": {
651
+ "precision": 0.7764577442101872,
652
+ "recall": 0.7813829787234042,
653
+ "f1-score": 0.778582321887831,
654
  "support": 762.0
655
  },
656
  "weighted avg": {
657
+ "precision": 0.7911982688458105,
658
+ "recall": 0.7887139107611548,
659
+ "f1-score": 0.7896462655654485,
660
  "support": 762.0
661
  }
662
  },
663
+ "roc_auc": 0.8769309239288836,
664
+ "average_precision": 0.8205790680541594
665
  }
666
  },
667
  {
 
671
  "artifact_dir": "/content/agri-wheat-classifier/baselines/embedding-svm",
672
  "artifact_file": "/content/agri-wheat-classifier/baselines/embedding-svm/embedding-svm.joblib",
673
  "validation_best_threshold": {
674
+ "threshold": 0.47291079297616506,
675
+ "f1": 0.7448275862068966,
676
+ "precision": 0.7035830618892508,
677
+ "recall": 0.7912087912087912
678
+ },
679
+ "validation_default_0_5": {
680
+ "threshold": 0.5,
681
+ "accuracy": 0.8036890645586298,
682
+ "precision": 0.7137931034482758,
683
+ "recall": 0.7582417582417582,
684
+ "f1": 0.7353463587921847,
685
+ "confusion_matrix": [
686
+ [
687
+ 403,
688
+ 83
689
+ ],
690
+ [
691
+ 66,
692
+ 207
693
+ ]
694
+ ],
695
+ "classification_report": {
696
+ "NOT_RELEVANT": {
697
+ "precision": 0.8592750533049041,
698
+ "recall": 0.8292181069958847,
699
+ "f1-score": 0.8439790575916231,
700
+ "support": 486.0
701
+ },
702
+ "RELEVANT": {
703
+ "precision": 0.7137931034482758,
704
+ "recall": 0.7582417582417582,
705
+ "f1-score": 0.7353463587921847,
706
+ "support": 273.0
707
+ },
708
+ "accuracy": 0.8036890645586298,
709
+ "macro avg": {
710
+ "precision": 0.78653407837659,
711
+ "recall": 0.7937299326188214,
712
+ "f1-score": 0.7896627081919039,
713
+ "support": 759.0
714
+ },
715
+ "weighted avg": {
716
+ "precision": 0.8069475535541011,
717
+ "recall": 0.8036890645586298,
718
+ "f1-score": 0.8049056362843152,
719
+ "support": 759.0
720
+ }
721
+ },
722
+ "roc_auc": 0.8689835541687394,
723
+ "average_precision": 0.7918617140757781
724
+ },
725
+ "validation_optimal_threshold": {
726
+ "threshold": 0.47291079297616506,
727
+ "accuracy": 0.8050065876152833,
728
+ "precision": 0.7035830618892508,
729
+ "recall": 0.7912087912087912,
730
+ "f1": 0.7448275862068966,
731
+ "confusion_matrix": [
732
+ [
733
+ 395,
734
+ 91
735
+ ],
736
+ [
737
+ 57,
738
+ 216
739
+ ]
740
+ ],
741
+ "classification_report": {
742
+ "NOT_RELEVANT": {
743
+ "precision": 0.8738938053097345,
744
+ "recall": 0.8127572016460906,
745
+ "f1-score": 0.8422174840085288,
746
+ "support": 486.0
747
+ },
748
+ "RELEVANT": {
749
+ "precision": 0.7035830618892508,
750
+ "recall": 0.7912087912087912,
751
+ "f1-score": 0.7448275862068966,
752
+ "support": 273.0
753
+ },
754
+ "accuracy": 0.8050065876152833,
755
+ "macro avg": {
756
+ "precision": 0.7887384335994927,
757
+ "recall": 0.8019829964274409,
758
+ "f1-score": 0.7935225351077126,
759
+ "support": 759.0
760
+ },
761
+ "weighted avg": {
762
+ "precision": 0.8126357908778609,
763
+ "recall": 0.8050065876152833,
764
+ "f1-score": 0.8071879160245424,
765
+ "support": 759.0
766
+ }
767
+ },
768
+ "roc_auc": 0.8689835541687394,
769
+ "average_precision": 0.7918617140757781
770
  },
771
  "test_default_0_5": {
772
  "threshold": 0.5,
773
+ "accuracy": 0.8175853018372703,
774
+ "precision": 0.7781818181818182,
775
+ "recall": 0.7328767123287672,
776
+ "f1": 0.7548500881834215,
777
  "confusion_matrix": [
778
  [
779
+ 409,
780
+ 61
781
  ],
782
  [
783
+ 78,
784
+ 214
785
  ]
786
  ],
787
  "classification_report": {
788
  "NOT_RELEVANT": {
789
+ "precision": 0.839835728952772,
790
+ "recall": 0.8702127659574468,
791
+ "f1-score": 0.8547544409613375,
792
  "support": 470.0
793
  },
794
  "RELEVANT": {
795
+ "precision": 0.7781818181818182,
796
+ "recall": 0.7328767123287672,
797
+ "f1-score": 0.7548500881834215,
798
  "support": 292.0
799
  },
800
+ "accuracy": 0.8175853018372703,
801
  "macro avg": {
802
+ "precision": 0.8090087735672951,
803
+ "recall": 0.801544739143107,
804
+ "f1-score": 0.8048022645723795,
805
  "support": 762.0
806
  },
807
  "weighted avg": {
808
+ "precision": 0.8162098208883121,
809
+ "recall": 0.8175853018372703,
810
+ "f1-score": 0.8164708832039208,
811
  "support": 762.0
812
  }
813
  },
814
+ "roc_auc": 0.8834814922763043,
815
+ "average_precision": 0.8241931351812108
816
  },
817
  "test_optimal_threshold": {
818
+ "threshold": 0.47291079297616506,
819
+ "accuracy": 0.8123359580052494,
820
+ "precision": 0.7577854671280276,
821
+ "recall": 0.75,
822
+ "f1": 0.7538726333907056,
823
  "confusion_matrix": [
824
  [
825
+ 400,
826
+ 70
827
  ],
828
  [
829
+ 73,
830
+ 219
831
  ]
832
  ],
833
  "classification_report": {
834
  "NOT_RELEVANT": {
835
+ "precision": 0.8456659619450317,
836
+ "recall": 0.851063829787234,
837
+ "f1-score": 0.848356309650053,
838
  "support": 470.0
839
  },
840
  "RELEVANT": {
841
+ "precision": 0.7577854671280276,
842
+ "recall": 0.75,
843
+ "f1-score": 0.7538726333907056,
844
  "support": 292.0
845
  },
846
+ "accuracy": 0.8123359580052494,
847
  "macro avg": {
848
+ "precision": 0.8017257145365297,
849
+ "recall": 0.800531914893617,
850
+ "f1-score": 0.8011144715203793,
851
  "support": 762.0
852
  },
853
  "weighted avg": {
854
+ "precision": 0.8119899718051824,
855
+ "recall": 0.8123359580052494,
856
+ "f1-score": 0.8121499665165498,
857
  "support": 762.0
858
  }
859
  },
860
+ "roc_auc": 0.8834814922763043,
861
+ "average_precision": 0.8241931351812108
862
  }
863
  },
864
  {
 
868
  "artifact_dir": "/content/agri-wheat-classifier/baselines/embedding-lightgbm",
869
  "artifact_file": "/content/agri-wheat-classifier/baselines/embedding-lightgbm/embedding-lightgbm.joblib",
870
  "validation_best_threshold": {
871
+ "threshold": 0.4330901925282479,
872
+ "f1": 0.7406143344709898,
873
+ "precision": 0.6932907348242812,
874
+ "recall": 0.7948717948717948
875
+ },
876
+ "validation_default_0_5": {
877
+ "threshold": 0.5,
878
+ "accuracy": 0.7905138339920948,
879
+ "precision": 0.6938775510204082,
880
+ "recall": 0.7472527472527473,
881
+ "f1": 0.7195767195767195,
882
+ "confusion_matrix": [
883
+ [
884
+ 396,
885
+ 90
886
+ ],
887
+ [
888
+ 69,
889
+ 204
890
+ ]
891
+ ],
892
+ "classification_report": {
893
+ "NOT_RELEVANT": {
894
+ "precision": 0.8516129032258064,
895
+ "recall": 0.8148148148148148,
896
+ "f1-score": 0.832807570977918,
897
+ "support": 486.0
898
+ },
899
+ "RELEVANT": {
900
+ "precision": 0.6938775510204082,
901
+ "recall": 0.7472527472527473,
902
+ "f1-score": 0.7195767195767195,
903
+ "support": 273.0
904
+ },
905
+ "accuracy": 0.7905138339920948,
906
+ "macro avg": {
907
+ "precision": 0.7727452271231072,
908
+ "recall": 0.7810337810337811,
909
+ "f1-score": 0.7761921452773188,
910
+ "support": 759.0
911
+ },
912
+ "weighted avg": {
913
+ "precision": 0.7948780532230743,
914
+ "recall": 0.7905138339920948,
915
+ "f1-score": 0.7920802686952735,
916
+ "support": 759.0
917
+ }
918
+ },
919
+ "roc_auc": 0.8684408869594056,
920
+ "average_precision": 0.7857728796203002
921
+ },
922
+ "validation_optimal_threshold": {
923
+ "threshold": 0.4330901925282479,
924
+ "accuracy": 0.7997364953886693,
925
+ "precision": 0.6932907348242812,
926
+ "recall": 0.7948717948717948,
927
+ "f1": 0.7406143344709898,
928
+ "confusion_matrix": [
929
+ [
930
+ 390,
931
+ 96
932
+ ],
933
+ [
934
+ 56,
935
+ 217
936
+ ]
937
+ ],
938
+ "classification_report": {
939
+ "NOT_RELEVANT": {
940
+ "precision": 0.874439461883408,
941
+ "recall": 0.8024691358024691,
942
+ "f1-score": 0.8369098712446352,
943
+ "support": 486.0
944
+ },
945
+ "RELEVANT": {
946
+ "precision": 0.6932907348242812,
947
+ "recall": 0.7948717948717948,
948
+ "f1-score": 0.7406143344709898,
949
+ "support": 273.0
950
+ },
951
+ "accuracy": 0.7997364953886693,
952
+ "macro avg": {
953
+ "precision": 0.7838650983538447,
954
+ "recall": 0.7986704653371319,
955
+ "f1-score": 0.7887621028578125,
956
+ "support": 759.0
957
+ },
958
+ "weighted avg": {
959
+ "precision": 0.8092832003720225,
960
+ "recall": 0.7997364953886693,
961
+ "f1-score": 0.8022739271877114,
962
+ "support": 759.0
963
+ }
964
+ },
965
+ "roc_auc": 0.8684408869594056,
966
+ "average_precision": 0.7857728796203002
967
  },
968
  "test_default_0_5": {
969
  "threshold": 0.5,
970
+ "accuracy": 0.7979002624671916,
971
+ "precision": 0.7395833333333334,
972
+ "recall": 0.7294520547945206,
973
+ "f1": 0.7344827586206897,
974
  "confusion_matrix": [
975
  [
976
+ 395,
977
+ 75
978
  ],
979
  [
980
+ 79,
981
+ 213
982
  ]
983
  ],
984
  "classification_report": {
985
  "NOT_RELEVANT": {
986
+ "precision": 0.8333333333333334,
987
+ "recall": 0.8404255319148937,
988
+ "f1-score": 0.836864406779661,
989
  "support": 470.0
990
  },
991
  "RELEVANT": {
992
+ "precision": 0.7395833333333334,
993
+ "recall": 0.7294520547945206,
994
+ "f1-score": 0.7344827586206897,
995
  "support": 292.0
996
  },
997
+ "accuracy": 0.7979002624671916,
998
  "macro avg": {
999
+ "precision": 0.7864583333333334,
1000
+ "recall": 0.7849387933547072,
1001
+ "f1-score": 0.7856735827001753,
1002
  "support": 762.0
1003
  },
1004
  "weighted avg": {
1005
+ "precision": 0.7974081364829396,
1006
+ "recall": 0.7979002624671916,
1007
+ "f1-score": 0.7976315442305539,
1008
  "support": 762.0
1009
  }
1010
  },
1011
+ "roc_auc": 0.8918245409501604,
1012
+ "average_precision": 0.8474870908097023
1013
  },
1014
  "test_optimal_threshold": {
1015
+ "threshold": 0.4330901925282479,
1016
  "accuracy": 0.8057742782152231,
1017
+ "precision": 0.7352941176470589,
1018
+ "recall": 0.7705479452054794,
1019
+ "f1": 0.7525083612040134,
1020
  "confusion_matrix": [
1021
  [
1022
+ 389,
1023
+ 81
1024
  ],
1025
  [
1026
+ 67,
1027
+ 225
1028
  ]
1029
  ],
1030
  "classification_report": {
1031
  "NOT_RELEVANT": {
1032
+ "precision": 0.8530701754385965,
1033
+ "recall": 0.8276595744680851,
1034
+ "f1-score": 0.8401727861771058,
1035
  "support": 470.0
1036
  },
1037
  "RELEVANT": {
1038
+ "precision": 0.7352941176470589,
1039
+ "recall": 0.7705479452054794,
1040
+ "f1-score": 0.7525083612040134,
1041
  "support": 292.0
1042
  },
1043
  "accuracy": 0.8057742782152231,
1044
  "macro avg": {
1045
+ "precision": 0.7941821465428277,
1046
+ "recall": 0.7991037598367823,
1047
+ "f1-score": 0.7963405736905596,
1048
  "support": 762.0
1049
  },
1050
  "weighted avg": {
1051
+ "precision": 0.8079381427940704,
1052
  "recall": 0.8057742782152231,
1053
+ "f1-score": 0.8065795944551334,
1054
  "support": 762.0
1055
  }
1056
  },
1057
+ "roc_auc": 0.8918245409501604,
1058
+ "average_precision": 0.8474870908097023
1059
  }
1060
  },
1061
  {
1062
  "model_type": "transformer",
1063
+ "model_name": "FacebookAI/xlm-roberta-base",
1064
  "artifact_dir": "/content/agri-wheat-classifier/transformer",
1065
  "validation_best_threshold": {
1066
+ "threshold": 0.2030746340751648,
1067
+ "f1": 0.8992805755395683,
1068
+ "precision": 0.8833922261484098,
1069
+ "recall": 0.9157509157509157
1070
+ },
1071
+ "validation_default_0_5": {
1072
+ "threshold": 0.5,
1073
+ "accuracy": 0.924901185770751,
1074
+ "precision": 0.8941605839416058,
1075
+ "recall": 0.8974358974358975,
1076
+ "f1": 0.8957952468007313,
1077
+ "confusion_matrix": [
1078
+ [
1079
+ 457,
1080
+ 29
1081
+ ],
1082
+ [
1083
+ 28,
1084
+ 245
1085
+ ]
1086
+ ],
1087
+ "classification_report": {
1088
+ "NOT_RELEVANT": {
1089
+ "precision": 0.9422680412371134,
1090
+ "recall": 0.9403292181069959,
1091
+ "f1-score": 0.94129763130793,
1092
+ "support": 486.0
1093
+ },
1094
+ "RELEVANT": {
1095
+ "precision": 0.8941605839416058,
1096
+ "recall": 0.8974358974358975,
1097
+ "f1-score": 0.8957952468007313,
1098
+ "support": 273.0
1099
+ },
1100
+ "accuracy": 0.924901185770751,
1101
+ "macro avg": {
1102
+ "precision": 0.9182143125893596,
1103
+ "recall": 0.9188825577714467,
1104
+ "f1-score": 0.9185464390543306,
1105
+ "support": 759.0
1106
+ },
1107
+ "weighted avg": {
1108
+ "precision": 0.9249645684549348,
1109
+ "recall": 0.924901185770751,
1110
+ "f1-score": 0.9249311609911115,
1111
+ "support": 759.0
1112
+ }
1113
+ },
1114
+ "roc_auc": 0.9563906600943638,
1115
+ "average_precision": 0.914270669615161
1116
+ },
1117
+ "validation_optimal_threshold": {
1118
+ "threshold": 0.2030746340751648,
1119
+ "accuracy": 0.9262187088274044,
1120
+ "precision": 0.8833922261484098,
1121
+ "recall": 0.9157509157509157,
1122
+ "f1": 0.8992805755395683,
1123
+ "confusion_matrix": [
1124
+ [
1125
+ 453,
1126
+ 33
1127
+ ],
1128
+ [
1129
+ 23,
1130
+ 250
1131
+ ]
1132
+ ],
1133
+ "classification_report": {
1134
+ "NOT_RELEVANT": {
1135
+ "precision": 0.9516806722689075,
1136
+ "recall": 0.9320987654320988,
1137
+ "f1-score": 0.9417879417879418,
1138
+ "support": 486.0
1139
+ },
1140
+ "RELEVANT": {
1141
+ "precision": 0.8833922261484098,
1142
+ "recall": 0.9157509157509157,
1143
+ "f1-score": 0.8992805755395683,
1144
+ "support": 273.0
1145
+ },
1146
+ "accuracy": 0.9262187088274044,
1147
+ "macro avg": {
1148
+ "precision": 0.9175364492086586,
1149
+ "recall": 0.9239248405915073,
1150
+ "f1-score": 0.920534258663755,
1151
+ "support": 759.0
1152
+ },
1153
+ "weighted avg": {
1154
+ "precision": 0.9271184248500723,
1155
+ "recall": 0.9262187088274044,
1156
+ "f1-score": 0.9264987310029535,
1157
+ "support": 759.0
1158
+ }
1159
+ },
1160
+ "roc_auc": 0.9563906600943638,
1161
+ "average_precision": 0.914270669615161
1162
  },
1163
  "test_default_0_5": {
1164
  "threshold": 0.5,
1165
+ "accuracy": 0.884514435695538,
1166
+ "precision": 0.8617021276595744,
1167
+ "recall": 0.8321917808219178,
1168
+ "f1": 0.8466898954703833,
1169
  "confusion_matrix": [
1170
  [
1171
+ 431,
1172
+ 39
1173
  ],
1174
  [
1175
+ 49,
1176
+ 243
1177
  ]
1178
  ],
1179
  "classification_report": {
1180
  "NOT_RELEVANT": {
1181
+ "precision": 0.8979166666666667,
1182
+ "recall": 0.9170212765957447,
1183
+ "f1-score": 0.9073684210526316,
1184
  "support": 470.0
1185
  },
1186
  "RELEVANT": {
1187
+ "precision": 0.8617021276595744,
1188
+ "recall": 0.8321917808219178,
1189
+ "f1-score": 0.8466898954703833,
1190
  "support": 292.0
1191
  },
1192
+ "accuracy": 0.884514435695538,
1193
  "macro avg": {
1194
+ "precision": 0.8798093971631206,
1195
+ "recall": 0.8746065287088313,
1196
+ "f1-score": 0.8770291582615075,
1197
  "support": 762.0
1198
  },
1199
  "weighted avg": {
1200
+ "precision": 0.8840391792781221,
1201
+ "recall": 0.884514435695538,
1202
+ "f1-score": 0.8841162826405365,
1203
  "support": 762.0
1204
  }
1205
  },
1206
+ "roc_auc": 0.9430960361410667,
1207
+ "average_precision": 0.9145930239314757
1208
  },
1209
  "test_optimal_threshold": {
1210
+ "threshold": 0.2030746340751648,
1211
+ "accuracy": 0.889763779527559,
1212
+ "precision": 0.8537414965986394,
1213
+ "recall": 0.8595890410958904,
1214
+ "f1": 0.856655290102389,
1215
  "confusion_matrix": [
1216
  [
1217
+ 427,
1218
+ 43
1219
  ],
1220
  [
1221
+ 41,
1222
+ 251
1223
  ]
1224
  ],
1225
  "classification_report": {
1226
  "NOT_RELEVANT": {
1227
+ "precision": 0.9123931623931624,
1228
+ "recall": 0.9085106382978724,
1229
+ "f1-score": 0.9104477611940298,
1230
  "support": 470.0
1231
  },
1232
  "RELEVANT": {
1233
+ "precision": 0.8537414965986394,
1234
+ "recall": 0.8595890410958904,
1235
+ "f1-score": 0.856655290102389,
1236
  "support": 292.0
1237
  },
1238
+ "accuracy": 0.889763779527559,
1239
  "macro avg": {
1240
+ "precision": 0.8830673294959008,
1241
+ "recall": 0.8840498396968814,
1242
+ "f1-score": 0.8835515256482094,
1243
  "support": 762.0
1244
  },
1245
  "weighted avg": {
1246
+ "precision": 0.8899177209075971,
1247
+ "recall": 0.889763779527559,
1248
+ "f1-score": 0.8898343733216425,
1249
  "support": 762.0
1250
  }
1251
  },
1252
+ "roc_auc": 0.9430960361410667,
1253
+ "average_precision": 0.9145930239314757
1254
  }
1255
  }
1256
  ]
transformer/checkpoint-1135/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_cross_attention": false,
3
+ "architectures": [
4
+ "XLMRobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "dtype": "float32",
10
+ "eos_token_id": 2,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "NOT_RELEVANT",
16
+ "1": "RELEVANT"
17
+ },
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 3072,
20
+ "is_decoder": false,
21
+ "label2id": {
22
+ "NOT_RELEVANT": 0,
23
+ "RELEVANT": 1
24
+ },
25
+ "layer_norm_eps": 1e-05,
26
+ "max_position_embeddings": 514,
27
+ "model_type": "xlm-roberta",
28
+ "num_attention_heads": 12,
29
+ "num_hidden_layers": 12,
30
+ "output_past": true,
31
+ "pad_token_id": 1,
32
+ "position_embedding_type": "absolute",
33
+ "problem_type": "single_label_classification",
34
+ "tie_word_embeddings": true,
35
+ "transformers_version": "5.9.0",
36
+ "type_vocab_size": 1,
37
+ "use_cache": false,
38
+ "vocab_size": 250002
39
+ }
transformer/checkpoint-1135/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49e9acb9da03ba022fefe40028a1df7f7fa4ccef569677e26812cafd225ef8ae
3
+ size 1112205008
transformer/checkpoint-1135/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cb1db50ca28d52ccaa8a7b730432372aa61a6a1769b32667328207c647b3a82
3
+ size 2224532875
transformer/checkpoint-1135/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7ce43043e72e4492ea95f56e147b44a9c5f8c96c4094efb5609bed070ff06c7
3
+ size 14645
transformer/checkpoint-1135/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e3e475419c2177afcc69f2e211ad481cd1b96ff1a6e3f24aee6b25a1983c6e1
3
+ size 1383
transformer/checkpoint-1135/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06462f15ba58fbb78a4dc0cc4e0eca4e82f972fc107ff3f7ac0f4c4d6c6eba6e
3
+ size 1465
transformer/checkpoint-1135/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc02d42fb2a10276563109e2287cc0dbe6b595d5b3b3401c7cfeffc0b7e20270
3
+ size 17098351
transformer/checkpoint-1135/tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "backend": "tokenizers",
4
+ "bos_token": "<s>",
5
+ "cls_token": "<s>",
6
+ "eos_token": "</s>",
7
+ "is_local": false,
8
+ "local_files_only": false,
9
+ "mask_token": "<mask>",
10
+ "model_max_length": 512,
11
+ "pad_token": "<pad>",
12
+ "sep_token": "</s>",
13
+ "tokenizer_class": "XLMRobertaTokenizer",
14
+ "unk_token": "<unk>"
15
+ }
transformer/checkpoint-1135/trainer_state.json ADDED
@@ -0,0 +1,423 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 908,
3
+ "best_metric": 0.8957952468007313,
4
+ "best_model_checkpoint": "/content/agri-wheat-classifier/transformer/checkpoint-908",
5
+ "epoch": 5.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1135,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.11013215859030837,
14
+ "grad_norm": 3.4615602493286133,
15
+ "learning_rate": 4.247787610619469e-06,
16
+ "loss": 0.7144061279296875,
17
+ "step": 25
18
+ },
19
+ {
20
+ "epoch": 0.22026431718061673,
21
+ "grad_norm": 6.50156307220459,
22
+ "learning_rate": 8.672566371681418e-06,
23
+ "loss": 0.6882943725585937,
24
+ "step": 50
25
+ },
26
+ {
27
+ "epoch": 0.3303964757709251,
28
+ "grad_norm": 4.099428176879883,
29
+ "learning_rate": 1.3097345132743363e-05,
30
+ "loss": 0.6711004638671875,
31
+ "step": 75
32
+ },
33
+ {
34
+ "epoch": 0.44052863436123346,
35
+ "grad_norm": 7.663174629211426,
36
+ "learning_rate": 1.7522123893805313e-05,
37
+ "loss": 0.681163330078125,
38
+ "step": 100
39
+ },
40
+ {
41
+ "epoch": 0.5506607929515418,
42
+ "grad_norm": 10.003649711608887,
43
+ "learning_rate": 1.9784735812133072e-05,
44
+ "loss": 0.672431640625,
45
+ "step": 125
46
+ },
47
+ {
48
+ "epoch": 0.6607929515418502,
49
+ "grad_norm": 4.136000633239746,
50
+ "learning_rate": 1.929549902152642e-05,
51
+ "loss": 0.6405279541015625,
52
+ "step": 150
53
+ },
54
+ {
55
+ "epoch": 0.7709251101321586,
56
+ "grad_norm": 8.746106147766113,
57
+ "learning_rate": 1.8806262230919768e-05,
58
+ "loss": 0.6329603576660157,
59
+ "step": 175
60
+ },
61
+ {
62
+ "epoch": 0.8810572687224669,
63
+ "grad_norm": 6.302291393280029,
64
+ "learning_rate": 1.8317025440313113e-05,
65
+ "loss": 0.48342845916748045,
66
+ "step": 200
67
+ },
68
+ {
69
+ "epoch": 0.9911894273127754,
70
+ "grad_norm": 9.206014633178711,
71
+ "learning_rate": 1.7827788649706457e-05,
72
+ "loss": 0.498061408996582,
73
+ "step": 225
74
+ },
75
+ {
76
+ "epoch": 1.0,
77
+ "eval_accuracy": 0.8880105401844532,
78
+ "eval_f1": 0.8434622467771639,
79
+ "eval_loss": 0.3247315287590027,
80
+ "eval_precision": 0.8481481481481481,
81
+ "eval_recall": 0.8388278388278388,
82
+ "eval_roc_auc": 0.9395378284267173,
83
+ "eval_runtime": 2.8582,
84
+ "eval_samples_per_second": 265.551,
85
+ "eval_steps_per_second": 8.397,
86
+ "step": 227
87
+ },
88
+ {
89
+ "epoch": 1.1013215859030836,
90
+ "grad_norm": 36.54235076904297,
91
+ "learning_rate": 1.7338551859099805e-05,
92
+ "loss": 0.34838932037353515,
93
+ "step": 250
94
+ },
95
+ {
96
+ "epoch": 1.2114537444933922,
97
+ "grad_norm": 11.171507835388184,
98
+ "learning_rate": 1.6849315068493153e-05,
99
+ "loss": 0.4250565719604492,
100
+ "step": 275
101
+ },
102
+ {
103
+ "epoch": 1.3215859030837005,
104
+ "grad_norm": 18.83371353149414,
105
+ "learning_rate": 1.6360078277886498e-05,
106
+ "loss": 0.3523222351074219,
107
+ "step": 300
108
+ },
109
+ {
110
+ "epoch": 1.4317180616740088,
111
+ "grad_norm": 12.578425407409668,
112
+ "learning_rate": 1.5870841487279843e-05,
113
+ "loss": 0.3326843643188477,
114
+ "step": 325
115
+ },
116
+ {
117
+ "epoch": 1.5418502202643172,
118
+ "grad_norm": 4.249295711517334,
119
+ "learning_rate": 1.538160469667319e-05,
120
+ "loss": 0.4222240447998047,
121
+ "step": 350
122
+ },
123
+ {
124
+ "epoch": 1.6519823788546255,
125
+ "grad_norm": 12.961797714233398,
126
+ "learning_rate": 1.4892367906066539e-05,
127
+ "loss": 0.3197017669677734,
128
+ "step": 375
129
+ },
130
+ {
131
+ "epoch": 1.7621145374449338,
132
+ "grad_norm": 15.027281761169434,
133
+ "learning_rate": 1.4403131115459884e-05,
134
+ "loss": 0.3585982894897461,
135
+ "step": 400
136
+ },
137
+ {
138
+ "epoch": 1.8722466960352424,
139
+ "grad_norm": 5.844873428344727,
140
+ "learning_rate": 1.391389432485323e-05,
141
+ "loss": 0.3265089416503906,
142
+ "step": 425
143
+ },
144
+ {
145
+ "epoch": 1.9823788546255505,
146
+ "grad_norm": 17.369247436523438,
147
+ "learning_rate": 1.3424657534246576e-05,
148
+ "loss": 0.30590789794921874,
149
+ "step": 450
150
+ },
151
+ {
152
+ "epoch": 2.0,
153
+ "eval_accuracy": 0.8880105401844532,
154
+ "eval_f1": 0.8571428571428571,
155
+ "eval_loss": 0.2787843644618988,
156
+ "eval_precision": 0.7919254658385093,
157
+ "eval_recall": 0.9340659340659341,
158
+ "eval_roc_auc": 0.959367792701126,
159
+ "eval_runtime": 2.7537,
160
+ "eval_samples_per_second": 275.624,
161
+ "eval_steps_per_second": 8.715,
162
+ "step": 454
163
+ },
164
+ {
165
+ "epoch": 2.092511013215859,
166
+ "grad_norm": 10.165802955627441,
167
+ "learning_rate": 1.2935420743639924e-05,
168
+ "loss": 0.3099861145019531,
169
+ "step": 475
170
+ },
171
+ {
172
+ "epoch": 2.202643171806167,
173
+ "grad_norm": 12.579766273498535,
174
+ "learning_rate": 1.2446183953033269e-05,
175
+ "loss": 0.24645326614379884,
176
+ "step": 500
177
+ },
178
+ {
179
+ "epoch": 2.3127753303964758,
180
+ "grad_norm": 16.500667572021484,
181
+ "learning_rate": 1.1956947162426615e-05,
182
+ "loss": 0.32091796875,
183
+ "step": 525
184
+ },
185
+ {
186
+ "epoch": 2.4229074889867843,
187
+ "grad_norm": 48.79829788208008,
188
+ "learning_rate": 1.1467710371819962e-05,
189
+ "loss": 0.28792179107666016,
190
+ "step": 550
191
+ },
192
+ {
193
+ "epoch": 2.5330396475770924,
194
+ "grad_norm": 3.1696665287017822,
195
+ "learning_rate": 1.097847358121331e-05,
196
+ "loss": 0.20424072265625,
197
+ "step": 575
198
+ },
199
+ {
200
+ "epoch": 2.643171806167401,
201
+ "grad_norm": 5.296319007873535,
202
+ "learning_rate": 1.0489236790606654e-05,
203
+ "loss": 0.3179521179199219,
204
+ "step": 600
205
+ },
206
+ {
207
+ "epoch": 2.753303964757709,
208
+ "grad_norm": 2.286626100540161,
209
+ "learning_rate": 1e-05,
210
+ "loss": 0.20801780700683595,
211
+ "step": 625
212
+ },
213
+ {
214
+ "epoch": 2.8634361233480177,
215
+ "grad_norm": 18.524463653564453,
216
+ "learning_rate": 9.510763209393347e-06,
217
+ "loss": 0.2919172477722168,
218
+ "step": 650
219
+ },
220
+ {
221
+ "epoch": 2.9735682819383262,
222
+ "grad_norm": 11.000245094299316,
223
+ "learning_rate": 9.021526418786694e-06,
224
+ "loss": 0.2509865570068359,
225
+ "step": 675
226
+ },
227
+ {
228
+ "epoch": 3.0,
229
+ "eval_accuracy": 0.9117259552042161,
230
+ "eval_f1": 0.8809946714031972,
231
+ "eval_loss": 0.2792136073112488,
232
+ "eval_precision": 0.8551724137931035,
233
+ "eval_recall": 0.9084249084249084,
234
+ "eval_roc_auc": 0.9613387298572483,
235
+ "eval_runtime": 2.7576,
236
+ "eval_samples_per_second": 275.236,
237
+ "eval_steps_per_second": 8.703,
238
+ "step": 681
239
+ },
240
+ {
241
+ "epoch": 3.0837004405286343,
242
+ "grad_norm": 7.4162092208862305,
243
+ "learning_rate": 8.53228962818004e-06,
244
+ "loss": 0.2622438621520996,
245
+ "step": 700
246
+ },
247
+ {
248
+ "epoch": 3.193832599118943,
249
+ "grad_norm": 28.406911849975586,
250
+ "learning_rate": 8.043052837573386e-06,
251
+ "loss": 0.25699047088623045,
252
+ "step": 725
253
+ },
254
+ {
255
+ "epoch": 3.303964757709251,
256
+ "grad_norm": 6.7196173667907715,
257
+ "learning_rate": 7.553816046966732e-06,
258
+ "loss": 0.24291038513183594,
259
+ "step": 750
260
+ },
261
+ {
262
+ "epoch": 3.4140969162995596,
263
+ "grad_norm": 2.39223575592041,
264
+ "learning_rate": 7.064579256360079e-06,
265
+ "loss": 0.19176918029785156,
266
+ "step": 775
267
+ },
268
+ {
269
+ "epoch": 3.5242290748898677,
270
+ "grad_norm": 0.3074190616607666,
271
+ "learning_rate": 6.5753424657534245e-06,
272
+ "loss": 0.1717354965209961,
273
+ "step": 800
274
+ },
275
+ {
276
+ "epoch": 3.6343612334801763,
277
+ "grad_norm": 38.88882064819336,
278
+ "learning_rate": 6.086105675146772e-06,
279
+ "loss": 0.21332130432128907,
280
+ "step": 825
281
+ },
282
+ {
283
+ "epoch": 3.744493392070485,
284
+ "grad_norm": 33.53501510620117,
285
+ "learning_rate": 5.596868884540117e-06,
286
+ "loss": 0.27872785568237307,
287
+ "step": 850
288
+ },
289
+ {
290
+ "epoch": 3.854625550660793,
291
+ "grad_norm": 6.368429183959961,
292
+ "learning_rate": 5.1076320939334645e-06,
293
+ "loss": 0.1715443801879883,
294
+ "step": 875
295
+ },
296
+ {
297
+ "epoch": 3.964757709251101,
298
+ "grad_norm": 15.106273651123047,
299
+ "learning_rate": 4.61839530332681e-06,
300
+ "loss": 0.1291443634033203,
301
+ "step": 900
302
+ },
303
+ {
304
+ "epoch": 4.0,
305
+ "eval_accuracy": 0.924901185770751,
306
+ "eval_f1": 0.8957952468007313,
307
+ "eval_loss": 0.309478223323822,
308
+ "eval_precision": 0.8941605839416058,
309
+ "eval_recall": 0.8974358974358975,
310
+ "eval_roc_auc": 0.9563906600943638,
311
+ "eval_runtime": 2.7645,
312
+ "eval_samples_per_second": 274.553,
313
+ "eval_steps_per_second": 8.682,
314
+ "step": 908
315
+ },
316
+ {
317
+ "epoch": 4.07488986784141,
318
+ "grad_norm": 13.50270938873291,
319
+ "learning_rate": 4.129158512720156e-06,
320
+ "loss": 0.19201255798339845,
321
+ "step": 925
322
+ },
323
+ {
324
+ "epoch": 4.185022026431718,
325
+ "grad_norm": 9.977457046508789,
326
+ "learning_rate": 3.639921722113503e-06,
327
+ "loss": 0.19937816619873047,
328
+ "step": 950
329
+ },
330
+ {
331
+ "epoch": 4.295154185022026,
332
+ "grad_norm": 0.21389485895633698,
333
+ "learning_rate": 3.1506849315068495e-06,
334
+ "loss": 0.13615628242492675,
335
+ "step": 975
336
+ },
337
+ {
338
+ "epoch": 4.405286343612334,
339
+ "grad_norm": 20.24959373474121,
340
+ "learning_rate": 2.661448140900196e-06,
341
+ "loss": 0.16153732299804688,
342
+ "step": 1000
343
+ },
344
+ {
345
+ "epoch": 4.515418502202643,
346
+ "grad_norm": 0.8777796030044556,
347
+ "learning_rate": 2.1722113502935423e-06,
348
+ "loss": 0.2165633773803711,
349
+ "step": 1025
350
+ },
351
+ {
352
+ "epoch": 4.6255506607929515,
353
+ "grad_norm": 7.8306427001953125,
354
+ "learning_rate": 1.6829745596868884e-06,
355
+ "loss": 0.1584473419189453,
356
+ "step": 1050
357
+ },
358
+ {
359
+ "epoch": 4.73568281938326,
360
+ "grad_norm": 13.857385635375977,
361
+ "learning_rate": 1.1937377690802348e-06,
362
+ "loss": 0.14604366302490235,
363
+ "step": 1075
364
+ },
365
+ {
366
+ "epoch": 4.845814977973569,
367
+ "grad_norm": 4.830343723297119,
368
+ "learning_rate": 7.045009784735812e-07,
369
+ "loss": 0.16852569580078125,
370
+ "step": 1100
371
+ },
372
+ {
373
+ "epoch": 4.955947136563877,
374
+ "grad_norm": 9.196575164794922,
375
+ "learning_rate": 2.152641878669276e-07,
376
+ "loss": 0.1510540199279785,
377
+ "step": 1125
378
+ },
379
+ {
380
+ "epoch": 5.0,
381
+ "eval_accuracy": 0.9183135704874835,
382
+ "eval_f1": 0.8908450704225352,
383
+ "eval_loss": 0.34406429529190063,
384
+ "eval_precision": 0.8576271186440678,
385
+ "eval_recall": 0.9267399267399268,
386
+ "eval_roc_auc": 0.9593037278222463,
387
+ "eval_runtime": 2.7389,
388
+ "eval_samples_per_second": 277.123,
389
+ "eval_steps_per_second": 8.763,
390
+ "step": 1135
391
+ }
392
+ ],
393
+ "logging_steps": 25,
394
+ "max_steps": 1135,
395
+ "num_input_tokens_seen": 0,
396
+ "num_train_epochs": 5,
397
+ "save_steps": 500,
398
+ "stateful_callbacks": {
399
+ "EarlyStoppingCallback": {
400
+ "args": {
401
+ "early_stopping_patience": 2,
402
+ "early_stopping_threshold": 0.0
403
+ },
404
+ "attributes": {
405
+ "early_stopping_patience_counter": 1
406
+ }
407
+ },
408
+ "TrainerControl": {
409
+ "args": {
410
+ "should_epoch_stop": false,
411
+ "should_evaluate": false,
412
+ "should_log": false,
413
+ "should_save": true,
414
+ "should_training_stop": true
415
+ },
416
+ "attributes": {}
417
+ }
418
+ },
419
+ "total_flos": 2382470606284800.0,
420
+ "train_batch_size": 16,
421
+ "trial_name": null,
422
+ "trial_params": null
423
+ }
transformer/checkpoint-1135/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad6f33c2b51fa383f7ec28e16f5b3fe2bf41c0a6a27a9c136fc4163f2fd2af84
3
+ size 5265
transformer/checkpoint-227/config.json CHANGED
@@ -1,37 +1,39 @@
1
  {
2
- "activation": "gelu",
3
  "architectures": [
4
- "DistilBertForSequenceClassification"
5
  ],
6
- "attention_dropout": 0.1,
7
- "bos_token_id": null,
8
- "dim": 768,
9
- "dropout": 0.1,
10
  "dtype": "float32",
11
- "eos_token_id": null,
12
- "hidden_dim": 3072,
 
 
13
  "id2label": {
14
  "0": "NOT_RELEVANT",
15
  "1": "RELEVANT"
16
  },
17
  "initializer_range": 0.02,
 
 
18
  "label2id": {
19
  "NOT_RELEVANT": 0,
20
  "RELEVANT": 1
21
  },
22
- "max_position_embeddings": 512,
23
- "model_type": "distilbert",
24
- "n_heads": 12,
25
- "n_layers": 6,
 
26
  "output_past": true,
27
- "pad_token_id": 0,
 
28
  "problem_type": "single_label_classification",
29
- "qa_dropout": 0.1,
30
- "seq_classif_dropout": 0.2,
31
- "sinusoidal_pos_embds": false,
32
- "tie_weights_": true,
33
  "tie_word_embeddings": true,
34
- "transformers_version": "5.8.1",
 
35
  "use_cache": false,
36
- "vocab_size": 119547
37
  }
 
1
  {
2
+ "add_cross_attention": false,
3
  "architectures": [
4
+ "XLMRobertaForSequenceClassification"
5
  ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
 
9
  "dtype": "float32",
10
+ "eos_token_id": 2,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
  "id2label": {
15
  "0": "NOT_RELEVANT",
16
  "1": "RELEVANT"
17
  },
18
  "initializer_range": 0.02,
19
+ "intermediate_size": 3072,
20
+ "is_decoder": false,
21
  "label2id": {
22
  "NOT_RELEVANT": 0,
23
  "RELEVANT": 1
24
  },
25
+ "layer_norm_eps": 1e-05,
26
+ "max_position_embeddings": 514,
27
+ "model_type": "xlm-roberta",
28
+ "num_attention_heads": 12,
29
+ "num_hidden_layers": 12,
30
  "output_past": true,
31
+ "pad_token_id": 1,
32
+ "position_embedding_type": "absolute",
33
  "problem_type": "single_label_classification",
 
 
 
 
34
  "tie_word_embeddings": true,
35
+ "transformers_version": "5.9.0",
36
+ "type_vocab_size": 1,
37
  "use_cache": false,
38
+ "vocab_size": 250002
39
  }
transformer/checkpoint-227/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cede1e3c0d6f010b726da8422a7c555e08c61be7af5f00f8477297b28a7b708
3
- size 541317368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ae8505bd923cfe335983cca3cba49d4461527d3bb96fc02892a33697682353e
3
+ size 1112205008
transformer/checkpoint-227/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf5feec4824c7c215d1b0de421e30f0d38d5700d2e1efc79099d80e32912708b
3
- size 1082698827
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db999a375a165b45e9dfd99aae3b7375b3b92a4981dbfefc3d6ac2fac61108d4
3
+ size 2224532875
transformer/checkpoint-227/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5dd47edc976f1f1e4a848e1daffaaa533c0664f4d98fe31814a2b3deb4e4cd9c
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f255e472d4a5579c702ed63db6978307a2abcb0abd903d3ee59892545951936
3
  size 14645
transformer/checkpoint-227/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bac907eb00ed55aa1cc8c3b58662d1f1268807a09194ba0d2dcb1faee5f1ac0b
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89f7ad9587099a5114861231d169a8ed0d5c9a53a0de53f61a918378c928e007
3
  size 1383
transformer/checkpoint-227/tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
transformer/checkpoint-227/tokenizer_config.json CHANGED
@@ -1,15 +1,15 @@
1
  {
 
2
  "backend": "tokenizers",
3
- "cls_token": "[CLS]",
4
- "do_lower_case": false,
 
5
  "is_local": false,
6
  "local_files_only": false,
7
- "mask_token": "[MASK]",
8
  "model_max_length": 512,
9
- "pad_token": "[PAD]",
10
- "sep_token": "[SEP]",
11
- "strip_accents": null,
12
- "tokenize_chinese_chars": true,
13
- "tokenizer_class": "BertTokenizer",
14
- "unk_token": "[UNK]"
15
  }
 
1
  {
2
+ "add_prefix_space": true,
3
  "backend": "tokenizers",
4
+ "bos_token": "<s>",
5
+ "cls_token": "<s>",
6
+ "eos_token": "</s>",
7
  "is_local": false,
8
  "local_files_only": false,
9
+ "mask_token": "<mask>",
10
  "model_max_length": 512,
11
+ "pad_token": "<pad>",
12
+ "sep_token": "</s>",
13
+ "tokenizer_class": "XLMRobertaTokenizer",
14
+ "unk_token": "<unk>"
 
 
15
  }
transformer/checkpoint-227/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 227,
3
- "best_metric": 0.8527397260273972,
4
  "best_model_checkpoint": "/content/agri-wheat-classifier/transformer/checkpoint-227",
5
  "epoch": 1.0,
6
  "eval_steps": 500,
@@ -11,78 +11,78 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.11013215859030837,
14
- "grad_norm": 1.3902881145477295,
15
  "learning_rate": 4.247787610619469e-06,
16
- "loss": 0.6896240234375,
17
  "step": 25
18
  },
19
  {
20
  "epoch": 0.22026431718061673,
21
- "grad_norm": 1.9834767580032349,
22
  "learning_rate": 8.672566371681418e-06,
23
- "loss": 0.6819134521484375,
24
  "step": 50
25
  },
26
  {
27
  "epoch": 0.3303964757709251,
28
- "grad_norm": 1.3664361238479614,
29
  "learning_rate": 1.3097345132743363e-05,
30
- "loss": 0.67228271484375,
31
  "step": 75
32
  },
33
  {
34
  "epoch": 0.44052863436123346,
35
- "grad_norm": 3.0097296237945557,
36
  "learning_rate": 1.7522123893805313e-05,
37
- "loss": 0.6490057373046875,
38
  "step": 100
39
  },
40
  {
41
  "epoch": 0.5506607929515418,
42
- "grad_norm": 4.345597267150879,
43
  "learning_rate": 1.9784735812133072e-05,
44
- "loss": 0.5840890502929688,
45
  "step": 125
46
  },
47
  {
48
  "epoch": 0.6607929515418502,
49
- "grad_norm": 6.036681175231934,
50
  "learning_rate": 1.929549902152642e-05,
51
- "loss": 0.4646404266357422,
52
  "step": 150
53
  },
54
  {
55
  "epoch": 0.7709251101321586,
56
- "grad_norm": 17.463848114013672,
57
  "learning_rate": 1.8806262230919768e-05,
58
- "loss": 0.4186407089233398,
59
  "step": 175
60
  },
61
  {
62
  "epoch": 0.8810572687224669,
63
- "grad_norm": 4.881515026092529,
64
  "learning_rate": 1.8317025440313113e-05,
65
- "loss": 0.408887939453125,
66
  "step": 200
67
  },
68
  {
69
  "epoch": 0.9911894273127754,
70
- "grad_norm": 5.256113529205322,
71
  "learning_rate": 1.7827788649706457e-05,
72
- "loss": 0.3965899658203125,
73
  "step": 225
74
  },
75
  {
76
  "epoch": 1.0,
77
- "eval_accuracy": 0.8866930171277997,
78
- "eval_f1": 0.8527397260273972,
79
- "eval_loss": 0.29110145568847656,
80
- "eval_precision": 0.8006430868167203,
81
- "eval_recall": 0.9120879120879121,
82
- "eval_roc_auc": 0.9464116130782798,
83
- "eval_runtime": 1.638,
84
- "eval_samples_per_second": 463.383,
85
- "eval_steps_per_second": 14.652,
86
  "step": 227
87
  }
88
  ],
@@ -112,7 +112,7 @@
112
  "attributes": {}
113
  }
114
  },
115
- "total_flos": 239898458966016.0,
116
  "train_batch_size": 16,
117
  "trial_name": null,
118
  "trial_params": null
 
1
  {
2
  "best_global_step": 227,
3
+ "best_metric": 0.8434622467771639,
4
  "best_model_checkpoint": "/content/agri-wheat-classifier/transformer/checkpoint-227",
5
  "epoch": 1.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.11013215859030837,
14
+ "grad_norm": 3.4615602493286133,
15
  "learning_rate": 4.247787610619469e-06,
16
+ "loss": 0.7144061279296875,
17
  "step": 25
18
  },
19
  {
20
  "epoch": 0.22026431718061673,
21
+ "grad_norm": 6.50156307220459,
22
  "learning_rate": 8.672566371681418e-06,
23
+ "loss": 0.6882943725585937,
24
  "step": 50
25
  },
26
  {
27
  "epoch": 0.3303964757709251,
28
+ "grad_norm": 4.099428176879883,
29
  "learning_rate": 1.3097345132743363e-05,
30
+ "loss": 0.6711004638671875,
31
  "step": 75
32
  },
33
  {
34
  "epoch": 0.44052863436123346,
35
+ "grad_norm": 7.663174629211426,
36
  "learning_rate": 1.7522123893805313e-05,
37
+ "loss": 0.681163330078125,
38
  "step": 100
39
  },
40
  {
41
  "epoch": 0.5506607929515418,
42
+ "grad_norm": 10.003649711608887,
43
  "learning_rate": 1.9784735812133072e-05,
44
+ "loss": 0.672431640625,
45
  "step": 125
46
  },
47
  {
48
  "epoch": 0.6607929515418502,
49
+ "grad_norm": 4.136000633239746,
50
  "learning_rate": 1.929549902152642e-05,
51
+ "loss": 0.6405279541015625,
52
  "step": 150
53
  },
54
  {
55
  "epoch": 0.7709251101321586,
56
+ "grad_norm": 8.746106147766113,
57
  "learning_rate": 1.8806262230919768e-05,
58
+ "loss": 0.6329603576660157,
59
  "step": 175
60
  },
61
  {
62
  "epoch": 0.8810572687224669,
63
+ "grad_norm": 6.302291393280029,
64
  "learning_rate": 1.8317025440313113e-05,
65
+ "loss": 0.48342845916748045,
66
  "step": 200
67
  },
68
  {
69
  "epoch": 0.9911894273127754,
70
+ "grad_norm": 9.206014633178711,
71
  "learning_rate": 1.7827788649706457e-05,
72
+ "loss": 0.498061408996582,
73
  "step": 225
74
  },
75
  {
76
  "epoch": 1.0,
77
+ "eval_accuracy": 0.8880105401844532,
78
+ "eval_f1": 0.8434622467771639,
79
+ "eval_loss": 0.3247315287590027,
80
+ "eval_precision": 0.8481481481481481,
81
+ "eval_recall": 0.8388278388278388,
82
+ "eval_roc_auc": 0.9395378284267173,
83
+ "eval_runtime": 2.8582,
84
+ "eval_samples_per_second": 265.551,
85
+ "eval_steps_per_second": 8.397,
86
  "step": 227
87
  }
88
  ],
 
112
  "attributes": {}
113
  }
114
  },
115
+ "total_flos": 476494121256960.0,
116
  "train_batch_size": 16,
117
  "trial_name": null,
118
  "trial_params": null
transformer/checkpoint-454/config.json CHANGED
@@ -1,37 +1,39 @@
1
  {
2
- "activation": "gelu",
3
  "architectures": [
4
- "DistilBertForSequenceClassification"
5
  ],
6
- "attention_dropout": 0.1,
7
- "bos_token_id": null,
8
- "dim": 768,
9
- "dropout": 0.1,
10
  "dtype": "float32",
11
- "eos_token_id": null,
12
- "hidden_dim": 3072,
 
 
13
  "id2label": {
14
  "0": "NOT_RELEVANT",
15
  "1": "RELEVANT"
16
  },
17
  "initializer_range": 0.02,
 
 
18
  "label2id": {
19
  "NOT_RELEVANT": 0,
20
  "RELEVANT": 1
21
  },
22
- "max_position_embeddings": 512,
23
- "model_type": "distilbert",
24
- "n_heads": 12,
25
- "n_layers": 6,
 
26
  "output_past": true,
27
- "pad_token_id": 0,
 
28
  "problem_type": "single_label_classification",
29
- "qa_dropout": 0.1,
30
- "seq_classif_dropout": 0.2,
31
- "sinusoidal_pos_embds": false,
32
- "tie_weights_": true,
33
  "tie_word_embeddings": true,
34
- "transformers_version": "5.8.1",
 
35
  "use_cache": false,
36
- "vocab_size": 119547
37
  }
 
1
  {
2
+ "add_cross_attention": false,
3
  "architectures": [
4
+ "XLMRobertaForSequenceClassification"
5
  ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
 
9
  "dtype": "float32",
10
+ "eos_token_id": 2,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
  "id2label": {
15
  "0": "NOT_RELEVANT",
16
  "1": "RELEVANT"
17
  },
18
  "initializer_range": 0.02,
19
+ "intermediate_size": 3072,
20
+ "is_decoder": false,
21
  "label2id": {
22
  "NOT_RELEVANT": 0,
23
  "RELEVANT": 1
24
  },
25
+ "layer_norm_eps": 1e-05,
26
+ "max_position_embeddings": 514,
27
+ "model_type": "xlm-roberta",
28
+ "num_attention_heads": 12,
29
+ "num_hidden_layers": 12,
30
  "output_past": true,
31
+ "pad_token_id": 1,
32
+ "position_embedding_type": "absolute",
33
  "problem_type": "single_label_classification",
 
 
 
 
34
  "tie_word_embeddings": true,
35
+ "transformers_version": "5.9.0",
36
+ "type_vocab_size": 1,
37
  "use_cache": false,
38
+ "vocab_size": 250002
39
  }
transformer/checkpoint-454/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c813472ffef86e6412f8bc455be06e22e7b3be9e5979717cf0f10884503488bf
3
- size 541317368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97d244fdb253808bc25a95339c9095f5dc92925684865b678269c67b17b933be
3
+ size 1112205008
transformer/checkpoint-454/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:931fbcf19ecce6eeb7f41c3218585f141d0c43a579e1061b55d655329bb3de67
3
- size 1082698827
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0df3d9070e295427dc0c988c887e84b552089876eef56ca06d99d9a8875d76a
3
+ size 2224532875
transformer/checkpoint-454/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1921ad234fe3b91fe6a72596cdd2262832556d8d9ae96f192b266b92568ee3cd
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a84aa11b3c29b368f323fea589aa91007265db407738674e6271fe471994e1a
3
  size 14645
transformer/checkpoint-454/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac5aed5d012227ef149cfdb943aaec829a5038bc900759eb1618d705466b0691
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7207a60515ba26ba02a6e3b66fb20ca760789c077bc0a7b95de2554829d36af
3
  size 1383
transformer/checkpoint-454/tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
transformer/checkpoint-454/tokenizer_config.json CHANGED
@@ -1,15 +1,15 @@
1
  {
 
2
  "backend": "tokenizers",
3
- "cls_token": "[CLS]",
4
- "do_lower_case": false,
 
5
  "is_local": false,
6
  "local_files_only": false,
7
- "mask_token": "[MASK]",
8
  "model_max_length": 512,
9
- "pad_token": "[PAD]",
10
- "sep_token": "[SEP]",
11
- "strip_accents": null,
12
- "tokenize_chinese_chars": true,
13
- "tokenizer_class": "BertTokenizer",
14
- "unk_token": "[UNK]"
15
  }
 
1
  {
2
+ "add_prefix_space": true,
3
  "backend": "tokenizers",
4
+ "bos_token": "<s>",
5
+ "cls_token": "<s>",
6
+ "eos_token": "</s>",
7
  "is_local": false,
8
  "local_files_only": false,
9
+ "mask_token": "<mask>",
10
  "model_max_length": 512,
11
+ "pad_token": "<pad>",
12
+ "sep_token": "</s>",
13
+ "tokenizer_class": "XLMRobertaTokenizer",
14
+ "unk_token": "<unk>"
 
 
15
  }
transformer/checkpoint-454/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 454,
3
- "best_metric": 0.8765652951699463,
4
  "best_model_checkpoint": "/content/agri-wheat-classifier/transformer/checkpoint-454",
5
  "epoch": 2.0,
6
  "eval_steps": 500,
@@ -11,154 +11,154 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.11013215859030837,
14
- "grad_norm": 1.3902881145477295,
15
  "learning_rate": 4.247787610619469e-06,
16
- "loss": 0.6896240234375,
17
  "step": 25
18
  },
19
  {
20
  "epoch": 0.22026431718061673,
21
- "grad_norm": 1.9834767580032349,
22
  "learning_rate": 8.672566371681418e-06,
23
- "loss": 0.6819134521484375,
24
  "step": 50
25
  },
26
  {
27
  "epoch": 0.3303964757709251,
28
- "grad_norm": 1.3664361238479614,
29
  "learning_rate": 1.3097345132743363e-05,
30
- "loss": 0.67228271484375,
31
  "step": 75
32
  },
33
  {
34
  "epoch": 0.44052863436123346,
35
- "grad_norm": 3.0097296237945557,
36
  "learning_rate": 1.7522123893805313e-05,
37
- "loss": 0.6490057373046875,
38
  "step": 100
39
  },
40
  {
41
  "epoch": 0.5506607929515418,
42
- "grad_norm": 4.345597267150879,
43
  "learning_rate": 1.9784735812133072e-05,
44
- "loss": 0.5840890502929688,
45
  "step": 125
46
  },
47
  {
48
  "epoch": 0.6607929515418502,
49
- "grad_norm": 6.036681175231934,
50
  "learning_rate": 1.929549902152642e-05,
51
- "loss": 0.4646404266357422,
52
  "step": 150
53
  },
54
  {
55
  "epoch": 0.7709251101321586,
56
- "grad_norm": 17.463848114013672,
57
  "learning_rate": 1.8806262230919768e-05,
58
- "loss": 0.4186407089233398,
59
  "step": 175
60
  },
61
  {
62
  "epoch": 0.8810572687224669,
63
- "grad_norm": 4.881515026092529,
64
  "learning_rate": 1.8317025440313113e-05,
65
- "loss": 0.408887939453125,
66
  "step": 200
67
  },
68
  {
69
  "epoch": 0.9911894273127754,
70
- "grad_norm": 5.256113529205322,
71
  "learning_rate": 1.7827788649706457e-05,
72
- "loss": 0.3965899658203125,
73
  "step": 225
74
  },
75
  {
76
  "epoch": 1.0,
77
- "eval_accuracy": 0.8866930171277997,
78
- "eval_f1": 0.8527397260273972,
79
- "eval_loss": 0.29110145568847656,
80
- "eval_precision": 0.8006430868167203,
81
- "eval_recall": 0.9120879120879121,
82
- "eval_roc_auc": 0.9464116130782798,
83
- "eval_runtime": 1.638,
84
- "eval_samples_per_second": 463.383,
85
- "eval_steps_per_second": 14.652,
86
  "step": 227
87
  },
88
  {
89
  "epoch": 1.1013215859030836,
90
- "grad_norm": 8.05300521850586,
91
  "learning_rate": 1.7338551859099805e-05,
92
- "loss": 0.2900468635559082,
93
  "step": 250
94
  },
95
  {
96
  "epoch": 1.2114537444933922,
97
- "grad_norm": 3.648190975189209,
98
  "learning_rate": 1.6849315068493153e-05,
99
- "loss": 0.3152153396606445,
100
  "step": 275
101
  },
102
  {
103
  "epoch": 1.3215859030837005,
104
- "grad_norm": 13.387518882751465,
105
  "learning_rate": 1.6360078277886498e-05,
106
- "loss": 0.2970449638366699,
107
  "step": 300
108
  },
109
  {
110
  "epoch": 1.4317180616740088,
111
- "grad_norm": 2.984701633453369,
112
  "learning_rate": 1.5870841487279843e-05,
113
- "loss": 0.2916489791870117,
114
  "step": 325
115
  },
116
  {
117
  "epoch": 1.5418502202643172,
118
- "grad_norm": 4.417141914367676,
119
  "learning_rate": 1.538160469667319e-05,
120
- "loss": 0.2869120979309082,
121
  "step": 350
122
  },
123
  {
124
  "epoch": 1.6519823788546255,
125
- "grad_norm": 3.330986499786377,
126
  "learning_rate": 1.4892367906066539e-05,
127
- "loss": 0.3101354217529297,
128
  "step": 375
129
  },
130
  {
131
  "epoch": 1.7621145374449338,
132
- "grad_norm": 5.084468364715576,
133
  "learning_rate": 1.4403131115459884e-05,
134
- "loss": 0.30187932968139647,
135
  "step": 400
136
  },
137
  {
138
  "epoch": 1.8722466960352424,
139
- "grad_norm": 6.020040512084961,
140
  "learning_rate": 1.391389432485323e-05,
141
- "loss": 0.30431194305419923,
142
  "step": 425
143
  },
144
  {
145
  "epoch": 1.9823788546255505,
146
- "grad_norm": 3.684113025665283,
147
  "learning_rate": 1.3424657534246576e-05,
148
- "loss": 0.2970767021179199,
149
  "step": 450
150
  },
151
  {
152
  "epoch": 2.0,
153
- "eval_accuracy": 0.9090909090909091,
154
- "eval_f1": 0.8765652951699463,
155
- "eval_loss": 0.255423367023468,
156
- "eval_precision": 0.8566433566433567,
157
- "eval_recall": 0.8974358974358975,
158
- "eval_roc_auc": 0.9556633352929649,
159
- "eval_runtime": 1.716,
160
- "eval_samples_per_second": 442.313,
161
- "eval_steps_per_second": 13.986,
162
  "step": 454
163
  }
164
  ],
@@ -188,7 +188,7 @@
188
  "attributes": {}
189
  }
190
  },
191
- "total_flos": 479796917932032.0,
192
  "train_batch_size": 16,
193
  "trial_name": null,
194
  "trial_params": null
 
1
  {
2
  "best_global_step": 454,
3
+ "best_metric": 0.8571428571428571,
4
  "best_model_checkpoint": "/content/agri-wheat-classifier/transformer/checkpoint-454",
5
  "epoch": 2.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.11013215859030837,
14
+ "grad_norm": 3.4615602493286133,
15
  "learning_rate": 4.247787610619469e-06,
16
+ "loss": 0.7144061279296875,
17
  "step": 25
18
  },
19
  {
20
  "epoch": 0.22026431718061673,
21
+ "grad_norm": 6.50156307220459,
22
  "learning_rate": 8.672566371681418e-06,
23
+ "loss": 0.6882943725585937,
24
  "step": 50
25
  },
26
  {
27
  "epoch": 0.3303964757709251,
28
+ "grad_norm": 4.099428176879883,
29
  "learning_rate": 1.3097345132743363e-05,
30
+ "loss": 0.6711004638671875,
31
  "step": 75
32
  },
33
  {
34
  "epoch": 0.44052863436123346,
35
+ "grad_norm": 7.663174629211426,
36
  "learning_rate": 1.7522123893805313e-05,
37
+ "loss": 0.681163330078125,
38
  "step": 100
39
  },
40
  {
41
  "epoch": 0.5506607929515418,
42
+ "grad_norm": 10.003649711608887,
43
  "learning_rate": 1.9784735812133072e-05,
44
+ "loss": 0.672431640625,
45
  "step": 125
46
  },
47
  {
48
  "epoch": 0.6607929515418502,
49
+ "grad_norm": 4.136000633239746,
50
  "learning_rate": 1.929549902152642e-05,
51
+ "loss": 0.6405279541015625,
52
  "step": 150
53
  },
54
  {
55
  "epoch": 0.7709251101321586,
56
+ "grad_norm": 8.746106147766113,
57
  "learning_rate": 1.8806262230919768e-05,
58
+ "loss": 0.6329603576660157,
59
  "step": 175
60
  },
61
  {
62
  "epoch": 0.8810572687224669,
63
+ "grad_norm": 6.302291393280029,
64
  "learning_rate": 1.8317025440313113e-05,
65
+ "loss": 0.48342845916748045,
66
  "step": 200
67
  },
68
  {
69
  "epoch": 0.9911894273127754,
70
+ "grad_norm": 9.206014633178711,
71
  "learning_rate": 1.7827788649706457e-05,
72
+ "loss": 0.498061408996582,
73
  "step": 225
74
  },
75
  {
76
  "epoch": 1.0,
77
+ "eval_accuracy": 0.8880105401844532,
78
+ "eval_f1": 0.8434622467771639,
79
+ "eval_loss": 0.3247315287590027,
80
+ "eval_precision": 0.8481481481481481,
81
+ "eval_recall": 0.8388278388278388,
82
+ "eval_roc_auc": 0.9395378284267173,
83
+ "eval_runtime": 2.8582,
84
+ "eval_samples_per_second": 265.551,
85
+ "eval_steps_per_second": 8.397,
86
  "step": 227
87
  },
88
  {
89
  "epoch": 1.1013215859030836,
90
+ "grad_norm": 36.54235076904297,
91
  "learning_rate": 1.7338551859099805e-05,
92
+ "loss": 0.34838932037353515,
93
  "step": 250
94
  },
95
  {
96
  "epoch": 1.2114537444933922,
97
+ "grad_norm": 11.171507835388184,
98
  "learning_rate": 1.6849315068493153e-05,
99
+ "loss": 0.4250565719604492,
100
  "step": 275
101
  },
102
  {
103
  "epoch": 1.3215859030837005,
104
+ "grad_norm": 18.83371353149414,
105
  "learning_rate": 1.6360078277886498e-05,
106
+ "loss": 0.3523222351074219,
107
  "step": 300
108
  },
109
  {
110
  "epoch": 1.4317180616740088,
111
+ "grad_norm": 12.578425407409668,
112
  "learning_rate": 1.5870841487279843e-05,
113
+ "loss": 0.3326843643188477,
114
  "step": 325
115
  },
116
  {
117
  "epoch": 1.5418502202643172,
118
+ "grad_norm": 4.249295711517334,
119
  "learning_rate": 1.538160469667319e-05,
120
+ "loss": 0.4222240447998047,
121
  "step": 350
122
  },
123
  {
124
  "epoch": 1.6519823788546255,
125
+ "grad_norm": 12.961797714233398,
126
  "learning_rate": 1.4892367906066539e-05,
127
+ "loss": 0.3197017669677734,
128
  "step": 375
129
  },
130
  {
131
  "epoch": 1.7621145374449338,
132
+ "grad_norm": 15.027281761169434,
133
  "learning_rate": 1.4403131115459884e-05,
134
+ "loss": 0.3585982894897461,
135
  "step": 400
136
  },
137
  {
138
  "epoch": 1.8722466960352424,
139
+ "grad_norm": 5.844873428344727,
140
  "learning_rate": 1.391389432485323e-05,
141
+ "loss": 0.3265089416503906,
142
  "step": 425
143
  },
144
  {
145
  "epoch": 1.9823788546255505,
146
+ "grad_norm": 17.369247436523438,
147
  "learning_rate": 1.3424657534246576e-05,
148
+ "loss": 0.30590789794921874,
149
  "step": 450
150
  },
151
  {
152
  "epoch": 2.0,
153
+ "eval_accuracy": 0.8880105401844532,
154
+ "eval_f1": 0.8571428571428571,
155
+ "eval_loss": 0.2787843644618988,
156
+ "eval_precision": 0.7919254658385093,
157
+ "eval_recall": 0.9340659340659341,
158
+ "eval_roc_auc": 0.959367792701126,
159
+ "eval_runtime": 2.7537,
160
+ "eval_samples_per_second": 275.624,
161
+ "eval_steps_per_second": 8.715,
162
  "step": 454
163
  }
164
  ],
 
188
  "attributes": {}
189
  }
190
  },
191
+ "total_flos": 952988242513920.0,
192
  "train_batch_size": 16,
193
  "trial_name": null,
194
  "trial_params": null
transformer/checkpoint-681/config.json CHANGED
@@ -1,37 +1,39 @@
1
  {
2
- "activation": "gelu",
3
  "architectures": [
4
- "DistilBertForSequenceClassification"
5
  ],
6
- "attention_dropout": 0.1,
7
- "bos_token_id": null,
8
- "dim": 768,
9
- "dropout": 0.1,
10
  "dtype": "float32",
11
- "eos_token_id": null,
12
- "hidden_dim": 3072,
 
 
13
  "id2label": {
14
  "0": "NOT_RELEVANT",
15
  "1": "RELEVANT"
16
  },
17
  "initializer_range": 0.02,
 
 
18
  "label2id": {
19
  "NOT_RELEVANT": 0,
20
  "RELEVANT": 1
21
  },
22
- "max_position_embeddings": 512,
23
- "model_type": "distilbert",
24
- "n_heads": 12,
25
- "n_layers": 6,
 
26
  "output_past": true,
27
- "pad_token_id": 0,
 
28
  "problem_type": "single_label_classification",
29
- "qa_dropout": 0.1,
30
- "seq_classif_dropout": 0.2,
31
- "sinusoidal_pos_embds": false,
32
- "tie_weights_": true,
33
  "tie_word_embeddings": true,
34
- "transformers_version": "5.8.1",
 
35
  "use_cache": false,
36
- "vocab_size": 119547
37
  }
 
1
  {
2
+ "add_cross_attention": false,
3
  "architectures": [
4
+ "XLMRobertaForSequenceClassification"
5
  ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
 
9
  "dtype": "float32",
10
+ "eos_token_id": 2,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
  "id2label": {
15
  "0": "NOT_RELEVANT",
16
  "1": "RELEVANT"
17
  },
18
  "initializer_range": 0.02,
19
+ "intermediate_size": 3072,
20
+ "is_decoder": false,
21
  "label2id": {
22
  "NOT_RELEVANT": 0,
23
  "RELEVANT": 1
24
  },
25
+ "layer_norm_eps": 1e-05,
26
+ "max_position_embeddings": 514,
27
+ "model_type": "xlm-roberta",
28
+ "num_attention_heads": 12,
29
+ "num_hidden_layers": 12,
30
  "output_past": true,
31
+ "pad_token_id": 1,
32
+ "position_embedding_type": "absolute",
33
  "problem_type": "single_label_classification",
 
 
 
 
34
  "tie_word_embeddings": true,
35
+ "transformers_version": "5.9.0",
36
+ "type_vocab_size": 1,
37
  "use_cache": false,
38
+ "vocab_size": 250002
39
  }
transformer/checkpoint-681/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03dbde438b6b09e9c569ccf3d52d3026870a20fcbed088f82020a5c58d757b9c
3
- size 541317368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99cd43daccc55ad1ce995212b80d2c417997816e8fff5663abf8e22826766c90
3
+ size 1112205008
transformer/checkpoint-681/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98bc3e69e944d0fdfd57444f651c232fe90a0c01c77fc9485b1e89c9b1da6e44
3
- size 1082698827
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8ab5e9309c0a4d78dde38e471c4fdefdfb6af708e8fdac20851e3a7ba69f922
3
+ size 2224532875
transformer/checkpoint-681/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9be3f1c7f8a91518af8a97c2e068378aafd12649996efa700161802b694aaf3
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ef666104b20bd014bb99759c12ba2ab3544d8d6a6c794765b50a125425e2514
3
  size 14645
transformer/checkpoint-681/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:08bb78a60da8a2d1603090739ac594effe08f188234024d60472cddbc5a648e9
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7821ac2948192ae5dfdbf609f2c5b345a33913b1a653a84c34869f001e99d2aa
3
  size 1383
transformer/checkpoint-681/tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
transformer/checkpoint-681/tokenizer_config.json CHANGED
@@ -1,15 +1,15 @@
1
  {
 
2
  "backend": "tokenizers",
3
- "cls_token": "[CLS]",
4
- "do_lower_case": false,
 
5
  "is_local": false,
6
  "local_files_only": false,
7
- "mask_token": "[MASK]",
8
  "model_max_length": 512,
9
- "pad_token": "[PAD]",
10
- "sep_token": "[SEP]",
11
- "strip_accents": null,
12
- "tokenize_chinese_chars": true,
13
- "tokenizer_class": "BertTokenizer",
14
- "unk_token": "[UNK]"
15
  }
 
1
  {
2
+ "add_prefix_space": true,
3
  "backend": "tokenizers",
4
+ "bos_token": "<s>",
5
+ "cls_token": "<s>",
6
+ "eos_token": "</s>",
7
  "is_local": false,
8
  "local_files_only": false,
9
+ "mask_token": "<mask>",
10
  "model_max_length": 512,
11
+ "pad_token": "<pad>",
12
+ "sep_token": "</s>",
13
+ "tokenizer_class": "XLMRobertaTokenizer",
14
+ "unk_token": "<unk>"
 
 
15
  }
transformer/checkpoint-681/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_global_step": 454,
3
- "best_metric": 0.8765652951699463,
4
- "best_model_checkpoint": "/content/agri-wheat-classifier/transformer/checkpoint-454",
5
  "epoch": 3.0,
6
  "eval_steps": 500,
7
  "global_step": 681,
@@ -11,230 +11,230 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.11013215859030837,
14
- "grad_norm": 1.3902881145477295,
15
  "learning_rate": 4.247787610619469e-06,
16
- "loss": 0.6896240234375,
17
  "step": 25
18
  },
19
  {
20
  "epoch": 0.22026431718061673,
21
- "grad_norm": 1.9834767580032349,
22
  "learning_rate": 8.672566371681418e-06,
23
- "loss": 0.6819134521484375,
24
  "step": 50
25
  },
26
  {
27
  "epoch": 0.3303964757709251,
28
- "grad_norm": 1.3664361238479614,
29
  "learning_rate": 1.3097345132743363e-05,
30
- "loss": 0.67228271484375,
31
  "step": 75
32
  },
33
  {
34
  "epoch": 0.44052863436123346,
35
- "grad_norm": 3.0097296237945557,
36
  "learning_rate": 1.7522123893805313e-05,
37
- "loss": 0.6490057373046875,
38
  "step": 100
39
  },
40
  {
41
  "epoch": 0.5506607929515418,
42
- "grad_norm": 4.345597267150879,
43
  "learning_rate": 1.9784735812133072e-05,
44
- "loss": 0.5840890502929688,
45
  "step": 125
46
  },
47
  {
48
  "epoch": 0.6607929515418502,
49
- "grad_norm": 6.036681175231934,
50
  "learning_rate": 1.929549902152642e-05,
51
- "loss": 0.4646404266357422,
52
  "step": 150
53
  },
54
  {
55
  "epoch": 0.7709251101321586,
56
- "grad_norm": 17.463848114013672,
57
  "learning_rate": 1.8806262230919768e-05,
58
- "loss": 0.4186407089233398,
59
  "step": 175
60
  },
61
  {
62
  "epoch": 0.8810572687224669,
63
- "grad_norm": 4.881515026092529,
64
  "learning_rate": 1.8317025440313113e-05,
65
- "loss": 0.408887939453125,
66
  "step": 200
67
  },
68
  {
69
  "epoch": 0.9911894273127754,
70
- "grad_norm": 5.256113529205322,
71
  "learning_rate": 1.7827788649706457e-05,
72
- "loss": 0.3965899658203125,
73
  "step": 225
74
  },
75
  {
76
  "epoch": 1.0,
77
- "eval_accuracy": 0.8866930171277997,
78
- "eval_f1": 0.8527397260273972,
79
- "eval_loss": 0.29110145568847656,
80
- "eval_precision": 0.8006430868167203,
81
- "eval_recall": 0.9120879120879121,
82
- "eval_roc_auc": 0.9464116130782798,
83
- "eval_runtime": 1.638,
84
- "eval_samples_per_second": 463.383,
85
- "eval_steps_per_second": 14.652,
86
  "step": 227
87
  },
88
  {
89
  "epoch": 1.1013215859030836,
90
- "grad_norm": 8.05300521850586,
91
  "learning_rate": 1.7338551859099805e-05,
92
- "loss": 0.2900468635559082,
93
  "step": 250
94
  },
95
  {
96
  "epoch": 1.2114537444933922,
97
- "grad_norm": 3.648190975189209,
98
  "learning_rate": 1.6849315068493153e-05,
99
- "loss": 0.3152153396606445,
100
  "step": 275
101
  },
102
  {
103
  "epoch": 1.3215859030837005,
104
- "grad_norm": 13.387518882751465,
105
  "learning_rate": 1.6360078277886498e-05,
106
- "loss": 0.2970449638366699,
107
  "step": 300
108
  },
109
  {
110
  "epoch": 1.4317180616740088,
111
- "grad_norm": 2.984701633453369,
112
  "learning_rate": 1.5870841487279843e-05,
113
- "loss": 0.2916489791870117,
114
  "step": 325
115
  },
116
  {
117
  "epoch": 1.5418502202643172,
118
- "grad_norm": 4.417141914367676,
119
  "learning_rate": 1.538160469667319e-05,
120
- "loss": 0.2869120979309082,
121
  "step": 350
122
  },
123
  {
124
  "epoch": 1.6519823788546255,
125
- "grad_norm": 3.330986499786377,
126
  "learning_rate": 1.4892367906066539e-05,
127
- "loss": 0.3101354217529297,
128
  "step": 375
129
  },
130
  {
131
  "epoch": 1.7621145374449338,
132
- "grad_norm": 5.084468364715576,
133
  "learning_rate": 1.4403131115459884e-05,
134
- "loss": 0.30187932968139647,
135
  "step": 400
136
  },
137
  {
138
  "epoch": 1.8722466960352424,
139
- "grad_norm": 6.020040512084961,
140
  "learning_rate": 1.391389432485323e-05,
141
- "loss": 0.30431194305419923,
142
  "step": 425
143
  },
144
  {
145
  "epoch": 1.9823788546255505,
146
- "grad_norm": 3.684113025665283,
147
  "learning_rate": 1.3424657534246576e-05,
148
- "loss": 0.2970767021179199,
149
  "step": 450
150
  },
151
  {
152
  "epoch": 2.0,
153
- "eval_accuracy": 0.9090909090909091,
154
- "eval_f1": 0.8765652951699463,
155
- "eval_loss": 0.255423367023468,
156
- "eval_precision": 0.8566433566433567,
157
- "eval_recall": 0.8974358974358975,
158
- "eval_roc_auc": 0.9556633352929649,
159
- "eval_runtime": 1.716,
160
- "eval_samples_per_second": 442.313,
161
- "eval_steps_per_second": 13.986,
162
  "step": 454
163
  },
164
  {
165
  "epoch": 2.092511013215859,
166
- "grad_norm": 5.848704814910889,
167
  "learning_rate": 1.2935420743639924e-05,
168
- "loss": 0.2515826416015625,
169
  "step": 475
170
  },
171
  {
172
  "epoch": 2.202643171806167,
173
- "grad_norm": 6.936087131500244,
174
  "learning_rate": 1.2446183953033269e-05,
175
- "loss": 0.2454897689819336,
176
  "step": 500
177
  },
178
  {
179
  "epoch": 2.3127753303964758,
180
- "grad_norm": 8.904288291931152,
181
  "learning_rate": 1.1956947162426615e-05,
182
- "loss": 0.24430061340332032,
183
  "step": 525
184
  },
185
  {
186
  "epoch": 2.4229074889867843,
187
- "grad_norm": 9.340006828308105,
188
  "learning_rate": 1.1467710371819962e-05,
189
- "loss": 0.25365222930908204,
190
  "step": 550
191
  },
192
  {
193
  "epoch": 2.5330396475770924,
194
- "grad_norm": 10.99807071685791,
195
  "learning_rate": 1.097847358121331e-05,
196
- "loss": 0.20383157730102539,
197
  "step": 575
198
  },
199
  {
200
  "epoch": 2.643171806167401,
201
- "grad_norm": 0.9036199450492859,
202
  "learning_rate": 1.0489236790606654e-05,
203
- "loss": 0.2450111961364746,
204
  "step": 600
205
  },
206
  {
207
  "epoch": 2.753303964757709,
208
- "grad_norm": 11.331809043884277,
209
  "learning_rate": 1e-05,
210
- "loss": 0.2072979736328125,
211
  "step": 625
212
  },
213
  {
214
  "epoch": 2.8634361233480177,
215
- "grad_norm": 11.751481056213379,
216
  "learning_rate": 9.510763209393347e-06,
217
- "loss": 0.19760629653930664,
218
  "step": 650
219
  },
220
  {
221
  "epoch": 2.9735682819383262,
222
- "grad_norm": 10.376523971557617,
223
  "learning_rate": 9.021526418786694e-06,
224
- "loss": 0.17518287658691406,
225
  "step": 675
226
  },
227
  {
228
  "epoch": 3.0,
229
- "eval_accuracy": 0.8774703557312253,
230
- "eval_f1": 0.8426395939086294,
231
- "eval_loss": 0.3243195116519928,
232
- "eval_precision": 0.7830188679245284,
233
- "eval_recall": 0.9120879120879121,
234
- "eval_roc_auc": 0.9531158142269252,
235
- "eval_runtime": 1.6443,
236
- "eval_samples_per_second": 461.6,
237
- "eval_steps_per_second": 14.596,
238
  "step": 681
239
  }
240
  ],
@@ -250,7 +250,7 @@
250
  "early_stopping_threshold": 0.0
251
  },
252
  "attributes": {
253
- "early_stopping_patience_counter": 1
254
  }
255
  },
256
  "TrainerControl": {
@@ -264,7 +264,7 @@
264
  "attributes": {}
265
  }
266
  },
267
- "total_flos": 719695376898048.0,
268
  "train_batch_size": 16,
269
  "trial_name": null,
270
  "trial_params": null
 
1
  {
2
+ "best_global_step": 681,
3
+ "best_metric": 0.8809946714031972,
4
+ "best_model_checkpoint": "/content/agri-wheat-classifier/transformer/checkpoint-681",
5
  "epoch": 3.0,
6
  "eval_steps": 500,
7
  "global_step": 681,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.11013215859030837,
14
+ "grad_norm": 3.4615602493286133,
15
  "learning_rate": 4.247787610619469e-06,
16
+ "loss": 0.7144061279296875,
17
  "step": 25
18
  },
19
  {
20
  "epoch": 0.22026431718061673,
21
+ "grad_norm": 6.50156307220459,
22
  "learning_rate": 8.672566371681418e-06,
23
+ "loss": 0.6882943725585937,
24
  "step": 50
25
  },
26
  {
27
  "epoch": 0.3303964757709251,
28
+ "grad_norm": 4.099428176879883,
29
  "learning_rate": 1.3097345132743363e-05,
30
+ "loss": 0.6711004638671875,
31
  "step": 75
32
  },
33
  {
34
  "epoch": 0.44052863436123346,
35
+ "grad_norm": 7.663174629211426,
36
  "learning_rate": 1.7522123893805313e-05,
37
+ "loss": 0.681163330078125,
38
  "step": 100
39
  },
40
  {
41
  "epoch": 0.5506607929515418,
42
+ "grad_norm": 10.003649711608887,
43
  "learning_rate": 1.9784735812133072e-05,
44
+ "loss": 0.672431640625,
45
  "step": 125
46
  },
47
  {
48
  "epoch": 0.6607929515418502,
49
+ "grad_norm": 4.136000633239746,
50
  "learning_rate": 1.929549902152642e-05,
51
+ "loss": 0.6405279541015625,
52
  "step": 150
53
  },
54
  {
55
  "epoch": 0.7709251101321586,
56
+ "grad_norm": 8.746106147766113,
57
  "learning_rate": 1.8806262230919768e-05,
58
+ "loss": 0.6329603576660157,
59
  "step": 175
60
  },
61
  {
62
  "epoch": 0.8810572687224669,
63
+ "grad_norm": 6.302291393280029,
64
  "learning_rate": 1.8317025440313113e-05,
65
+ "loss": 0.48342845916748045,
66
  "step": 200
67
  },
68
  {
69
  "epoch": 0.9911894273127754,
70
+ "grad_norm": 9.206014633178711,
71
  "learning_rate": 1.7827788649706457e-05,
72
+ "loss": 0.498061408996582,
73
  "step": 225
74
  },
75
  {
76
  "epoch": 1.0,
77
+ "eval_accuracy": 0.8880105401844532,
78
+ "eval_f1": 0.8434622467771639,
79
+ "eval_loss": 0.3247315287590027,
80
+ "eval_precision": 0.8481481481481481,
81
+ "eval_recall": 0.8388278388278388,
82
+ "eval_roc_auc": 0.9395378284267173,
83
+ "eval_runtime": 2.8582,
84
+ "eval_samples_per_second": 265.551,
85
+ "eval_steps_per_second": 8.397,
86
  "step": 227
87
  },
88
  {
89
  "epoch": 1.1013215859030836,
90
+ "grad_norm": 36.54235076904297,
91
  "learning_rate": 1.7338551859099805e-05,
92
+ "loss": 0.34838932037353515,
93
  "step": 250
94
  },
95
  {
96
  "epoch": 1.2114537444933922,
97
+ "grad_norm": 11.171507835388184,
98
  "learning_rate": 1.6849315068493153e-05,
99
+ "loss": 0.4250565719604492,
100
  "step": 275
101
  },
102
  {
103
  "epoch": 1.3215859030837005,
104
+ "grad_norm": 18.83371353149414,
105
  "learning_rate": 1.6360078277886498e-05,
106
+ "loss": 0.3523222351074219,
107
  "step": 300
108
  },
109
  {
110
  "epoch": 1.4317180616740088,
111
+ "grad_norm": 12.578425407409668,
112
  "learning_rate": 1.5870841487279843e-05,
113
+ "loss": 0.3326843643188477,
114
  "step": 325
115
  },
116
  {
117
  "epoch": 1.5418502202643172,
118
+ "grad_norm": 4.249295711517334,
119
  "learning_rate": 1.538160469667319e-05,
120
+ "loss": 0.4222240447998047,
121
  "step": 350
122
  },
123
  {
124
  "epoch": 1.6519823788546255,
125
+ "grad_norm": 12.961797714233398,
126
  "learning_rate": 1.4892367906066539e-05,
127
+ "loss": 0.3197017669677734,
128
  "step": 375
129
  },
130
  {
131
  "epoch": 1.7621145374449338,
132
+ "grad_norm": 15.027281761169434,
133
  "learning_rate": 1.4403131115459884e-05,
134
+ "loss": 0.3585982894897461,
135
  "step": 400
136
  },
137
  {
138
  "epoch": 1.8722466960352424,
139
+ "grad_norm": 5.844873428344727,
140
  "learning_rate": 1.391389432485323e-05,
141
+ "loss": 0.3265089416503906,
142
  "step": 425
143
  },
144
  {
145
  "epoch": 1.9823788546255505,
146
+ "grad_norm": 17.369247436523438,
147
  "learning_rate": 1.3424657534246576e-05,
148
+ "loss": 0.30590789794921874,
149
  "step": 450
150
  },
151
  {
152
  "epoch": 2.0,
153
+ "eval_accuracy": 0.8880105401844532,
154
+ "eval_f1": 0.8571428571428571,
155
+ "eval_loss": 0.2787843644618988,
156
+ "eval_precision": 0.7919254658385093,
157
+ "eval_recall": 0.9340659340659341,
158
+ "eval_roc_auc": 0.959367792701126,
159
+ "eval_runtime": 2.7537,
160
+ "eval_samples_per_second": 275.624,
161
+ "eval_steps_per_second": 8.715,
162
  "step": 454
163
  },
164
  {
165
  "epoch": 2.092511013215859,
166
+ "grad_norm": 10.165802955627441,
167
  "learning_rate": 1.2935420743639924e-05,
168
+ "loss": 0.3099861145019531,
169
  "step": 475
170
  },
171
  {
172
  "epoch": 2.202643171806167,
173
+ "grad_norm": 12.579766273498535,
174
  "learning_rate": 1.2446183953033269e-05,
175
+ "loss": 0.24645326614379884,
176
  "step": 500
177
  },
178
  {
179
  "epoch": 2.3127753303964758,
180
+ "grad_norm": 16.500667572021484,
181
  "learning_rate": 1.1956947162426615e-05,
182
+ "loss": 0.32091796875,
183
  "step": 525
184
  },
185
  {
186
  "epoch": 2.4229074889867843,
187
+ "grad_norm": 48.79829788208008,
188
  "learning_rate": 1.1467710371819962e-05,
189
+ "loss": 0.28792179107666016,
190
  "step": 550
191
  },
192
  {
193
  "epoch": 2.5330396475770924,
194
+ "grad_norm": 3.1696665287017822,
195
  "learning_rate": 1.097847358121331e-05,
196
+ "loss": 0.20424072265625,
197
  "step": 575
198
  },
199
  {
200
  "epoch": 2.643171806167401,
201
+ "grad_norm": 5.296319007873535,
202
  "learning_rate": 1.0489236790606654e-05,
203
+ "loss": 0.3179521179199219,
204
  "step": 600
205
  },
206
  {
207
  "epoch": 2.753303964757709,
208
+ "grad_norm": 2.286626100540161,
209
  "learning_rate": 1e-05,
210
+ "loss": 0.20801780700683595,
211
  "step": 625
212
  },
213
  {
214
  "epoch": 2.8634361233480177,
215
+ "grad_norm": 18.524463653564453,
216
  "learning_rate": 9.510763209393347e-06,
217
+ "loss": 0.2919172477722168,
218
  "step": 650
219
  },
220
  {
221
  "epoch": 2.9735682819383262,
222
+ "grad_norm": 11.000245094299316,
223
  "learning_rate": 9.021526418786694e-06,
224
+ "loss": 0.2509865570068359,
225
  "step": 675
226
  },
227
  {
228
  "epoch": 3.0,
229
+ "eval_accuracy": 0.9117259552042161,
230
+ "eval_f1": 0.8809946714031972,
231
+ "eval_loss": 0.2792136073112488,
232
+ "eval_precision": 0.8551724137931035,
233
+ "eval_recall": 0.9084249084249084,
234
+ "eval_roc_auc": 0.9613387298572483,
235
+ "eval_runtime": 2.7576,
236
+ "eval_samples_per_second": 275.236,
237
+ "eval_steps_per_second": 8.703,
238
  "step": 681
239
  }
240
  ],
 
250
  "early_stopping_threshold": 0.0
251
  },
252
  "attributes": {
253
+ "early_stopping_patience_counter": 0
254
  }
255
  },
256
  "TrainerControl": {
 
264
  "attributes": {}
265
  }
266
  },
267
+ "total_flos": 1429482363770880.0,
268
  "train_batch_size": 16,
269
  "trial_name": null,
270
  "trial_params": null
transformer/checkpoint-908/config.json CHANGED
@@ -1,37 +1,39 @@
1
  {
2
- "activation": "gelu",
3
  "architectures": [
4
- "DistilBertForSequenceClassification"
5
  ],
6
- "attention_dropout": 0.1,
7
- "bos_token_id": null,
8
- "dim": 768,
9
- "dropout": 0.1,
10
  "dtype": "float32",
11
- "eos_token_id": null,
12
- "hidden_dim": 3072,
 
 
13
  "id2label": {
14
  "0": "NOT_RELEVANT",
15
  "1": "RELEVANT"
16
  },
17
  "initializer_range": 0.02,
 
 
18
  "label2id": {
19
  "NOT_RELEVANT": 0,
20
  "RELEVANT": 1
21
  },
22
- "max_position_embeddings": 512,
23
- "model_type": "distilbert",
24
- "n_heads": 12,
25
- "n_layers": 6,
 
26
  "output_past": true,
27
- "pad_token_id": 0,
 
28
  "problem_type": "single_label_classification",
29
- "qa_dropout": 0.1,
30
- "seq_classif_dropout": 0.2,
31
- "sinusoidal_pos_embds": false,
32
- "tie_weights_": true,
33
  "tie_word_embeddings": true,
34
- "transformers_version": "5.8.1",
 
35
  "use_cache": false,
36
- "vocab_size": 119547
37
  }
 
1
  {
2
+ "add_cross_attention": false,
3
  "architectures": [
4
+ "XLMRobertaForSequenceClassification"
5
  ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
 
9
  "dtype": "float32",
10
+ "eos_token_id": 2,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
  "id2label": {
15
  "0": "NOT_RELEVANT",
16
  "1": "RELEVANT"
17
  },
18
  "initializer_range": 0.02,
19
+ "intermediate_size": 3072,
20
+ "is_decoder": false,
21
  "label2id": {
22
  "NOT_RELEVANT": 0,
23
  "RELEVANT": 1
24
  },
25
+ "layer_norm_eps": 1e-05,
26
+ "max_position_embeddings": 514,
27
+ "model_type": "xlm-roberta",
28
+ "num_attention_heads": 12,
29
+ "num_hidden_layers": 12,
30
  "output_past": true,
31
+ "pad_token_id": 1,
32
+ "position_embedding_type": "absolute",
33
  "problem_type": "single_label_classification",
 
 
 
 
34
  "tie_word_embeddings": true,
35
+ "transformers_version": "5.9.0",
36
+ "type_vocab_size": 1,
37
  "use_cache": false,
38
+ "vocab_size": 250002
39
  }
transformer/checkpoint-908/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cda5d1601a665324d26d288eb6116f925bca8729dea7e932524e9bac11b6f5e4
3
- size 541317368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e5c530095532c2b7b2533294a4f71b74527ec3d50e89f8da1c71b1e8f7e6e45
3
+ size 1112205008
transformer/checkpoint-908/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d5fd9ef5917c9829fb094dcd22189d2d57d1ca5b11470f33e337d67b2d1d616
3
- size 1082698827
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac9ab63cee4c17da8e692806f4f7e83a5a4d99a2383f4d3c0421bd39157d1743
3
+ size 2224532875