lgsilvaesilva commited on
Commit
f2e02af
·
verified ·
1 Parent(s): e92df87

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +6 -0
  2. README.md +287 -0
  3. REPORT.md +140 -0
  4. baselines/embedding-lightgbm/embedding-lightgbm.joblib +3 -0
  5. baselines/embedding-lightgbm/test_predictions.csv +0 -0
  6. baselines/embedding-lightgbm/validation_predictions.csv +0 -0
  7. baselines/embedding-logistic/embedding-logistic.joblib +3 -0
  8. baselines/embedding-logistic/test_predictions.csv +0 -0
  9. baselines/embedding-logistic/validation_predictions.csv +0 -0
  10. baselines/embedding-svm/embedding-svm.joblib +3 -0
  11. baselines/embedding-svm/test_predictions.csv +0 -0
  12. baselines/embedding-svm/validation_predictions.csv +0 -0
  13. baselines/logistic/logistic_tfidf.joblib +3 -0
  14. baselines/logistic/test_predictions.csv +0 -0
  15. baselines/logistic/validation_predictions.csv +0 -0
  16. baselines/xgboost/test_predictions.csv +0 -0
  17. baselines/xgboost/validation_predictions.csv +0 -0
  18. baselines/xgboost/xgboost_tfidf.joblib +3 -0
  19. report.json +704 -0
  20. transformer/checkpoint-1220/config.json +39 -0
  21. transformer/checkpoint-1220/model.safetensors +3 -0
  22. transformer/checkpoint-1220/optimizer.pt +3 -0
  23. transformer/checkpoint-1220/rng_state.pth +3 -0
  24. transformer/checkpoint-1220/scaler.pt +3 -0
  25. transformer/checkpoint-1220/scheduler.pt +3 -0
  26. transformer/checkpoint-1220/tokenizer.json +3 -0
  27. transformer/checkpoint-1220/tokenizer_config.json +15 -0
  28. transformer/checkpoint-1220/trainer_state.json +431 -0
  29. transformer/checkpoint-1220/training_args.bin +3 -0
  30. transformer/checkpoint-1525/config.json +39 -0
  31. transformer/checkpoint-1525/model.safetensors +3 -0
  32. transformer/checkpoint-1525/optimizer.pt +3 -0
  33. transformer/checkpoint-1525/rng_state.pth +3 -0
  34. transformer/checkpoint-1525/scaler.pt +3 -0
  35. transformer/checkpoint-1525/scheduler.pt +3 -0
  36. transformer/checkpoint-1525/tokenizer.json +3 -0
  37. transformer/checkpoint-1525/tokenizer_config.json +15 -0
  38. transformer/checkpoint-1525/trainer_state.json +535 -0
  39. transformer/checkpoint-1525/training_args.bin +3 -0
  40. transformer/checkpoint-305/config.json +39 -0
  41. transformer/checkpoint-305/model.safetensors +3 -0
  42. transformer/checkpoint-305/optimizer.pt +3 -0
  43. transformer/checkpoint-305/rng_state.pth +3 -0
  44. transformer/checkpoint-305/scaler.pt +3 -0
  45. transformer/checkpoint-305/scheduler.pt +3 -0
  46. transformer/checkpoint-305/tokenizer.json +3 -0
  47. transformer/checkpoint-305/tokenizer_config.json +15 -0
  48. transformer/checkpoint-305/trainer_state.json +140 -0
  49. transformer/checkpoint-305/training_args.bin +3 -0
  50. transformer/checkpoint-610/config.json +39 -0
.gitattributes CHANGED
@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ transformer/checkpoint-1220/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ transformer/checkpoint-1525/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
+ transformer/checkpoint-305/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
+ transformer/checkpoint-610/tokenizer.json filter=lfs diff=lfs merge=lfs -text
40
+ transformer/checkpoint-915/tokenizer.json filter=lfs diff=lfs merge=lfs -text
41
+ transformer/tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ pipeline_tag: text-classification
4
+ base_model: FacebookAI/xlm-roberta-base
5
+ tags:
6
+ - text-classification
7
+ - binary-classification
8
+ - amis
9
+ - agriculture
10
+ language: multilingual
11
+ ---
12
+
13
+ # AMIS Commodity Classifier
14
+
15
+ This model repository contains artifacts from an AMIS commodity relevance classifier training run.
16
+ It includes the Transformer model, any configured TF-IDF or sentence-embedding baselines, prediction files, and the training report.
17
+
18
+ - Dataset: `faodl/amis-agri-utilization`
19
+ - Dataset subset: ``
20
+ - Text column: `chunk_text`
21
+ - Label column: `label`
22
+ - Transformer: `FacebookAI/xlm-roberta-base`
23
+ - Generated at: `2026-05-25T19:23:29.605062+00:00`
24
+
25
+ ## Dataset Summary
26
+
27
+ | Split | Rows | Label 0 | Label 1 | Unique groups | Mean text length |
28
+ | --- | ---: | ---: | ---: | ---: | ---: |
29
+ | train | 4877 | 4347 | 530 | 2513 | 696.6 |
30
+ | validation | 978 | 899 | 79 | 538 | 690.6 |
31
+ | test | 1016 | 904 | 112 | 539 | 690.7 |
32
+
33
+ ## Threshold Comparison on Test Split
34
+
35
+ | Model | Threshold | Accuracy | Precision | Recall | F1 | ROC AUC | Average precision |
36
+ | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
37
+ | logistic_tfidf | 0.500 | 0.926 | 0.691 | 0.598 | 0.641 | 0.899 | 0.726 |
38
+ | logistic_tfidf | 0.608 | 0.930 | 0.902 | 0.411 | 0.564 | 0.899 | 0.726 |
39
+ | xgboost_tfidf | 0.500 | 0.924 | 1.000 | 0.312 | 0.476 | 0.892 | 0.692 |
40
+ | xgboost_tfidf | 0.177 | 0.918 | 0.663 | 0.527 | 0.587 | 0.892 | 0.692 |
41
+ | embedding-logistic_sentence_embeddings | 0.500 | 0.899 | 0.524 | 0.866 | 0.653 | 0.952 | 0.759 |
42
+ | embedding-logistic_sentence_embeddings | 0.616 | 0.929 | 0.632 | 0.857 | 0.727 | 0.952 | 0.759 |
43
+ | embedding-svm_sentence_embeddings | 0.500 | 0.941 | 0.771 | 0.661 | 0.712 | 0.952 | 0.743 |
44
+ | embedding-svm_sentence_embeddings | 0.276 | 0.935 | 0.667 | 0.821 | 0.736 | 0.952 | 0.743 |
45
+ | embedding-lightgbm_sentence_embeddings | 0.500 | 0.946 | 0.788 | 0.696 | 0.739 | 0.959 | 0.801 |
46
+ | embedding-lightgbm_sentence_embeddings | 0.052 | 0.933 | 0.657 | 0.821 | 0.730 | 0.959 | 0.801 |
47
+ | transformer | 0.500 | 0.950 | 0.748 | 0.821 | 0.783 | 0.951 | 0.785 |
48
+ | transformer | 0.616 | 0.950 | 0.748 | 0.821 | 0.783 | 0.951 | 0.785 |
49
+
50
+ ## Confusion Matrices on Test Split
51
+
52
+ Rows are true labels and columns are predicted labels.
53
+
54
+ ### logistic_tfidf at threshold 0.500
55
+
56
+ | True / Predicted | NOT_RELEVANT | RELEVANT |
57
+ | --- | ---: | ---: |
58
+ | NOT_RELEVANT | 874 | 30 |
59
+ | RELEVANT | 45 | 67 |
60
+
61
+ ### logistic_tfidf at threshold 0.608
62
+
63
+ | True / Predicted | NOT_RELEVANT | RELEVANT |
64
+ | --- | ---: | ---: |
65
+ | NOT_RELEVANT | 899 | 5 |
66
+ | RELEVANT | 66 | 46 |
67
+
68
+ ### xgboost_tfidf at threshold 0.500
69
+
70
+ | True / Predicted | NOT_RELEVANT | RELEVANT |
71
+ | --- | ---: | ---: |
72
+ | NOT_RELEVANT | 904 | 0 |
73
+ | RELEVANT | 77 | 35 |
74
+
75
+ ### xgboost_tfidf at threshold 0.177
76
+
77
+ | True / Predicted | NOT_RELEVANT | RELEVANT |
78
+ | --- | ---: | ---: |
79
+ | NOT_RELEVANT | 874 | 30 |
80
+ | RELEVANT | 53 | 59 |
81
+
82
+ ### embedding-logistic_sentence_embeddings at threshold 0.500
83
+
84
+ | True / Predicted | NOT_RELEVANT | RELEVANT |
85
+ | --- | ---: | ---: |
86
+ | NOT_RELEVANT | 816 | 88 |
87
+ | RELEVANT | 15 | 97 |
88
+
89
+ ### embedding-logistic_sentence_embeddings at threshold 0.616
90
+
91
+ | True / Predicted | NOT_RELEVANT | RELEVANT |
92
+ | --- | ---: | ---: |
93
+ | NOT_RELEVANT | 848 | 56 |
94
+ | RELEVANT | 16 | 96 |
95
+
96
+ ### embedding-svm_sentence_embeddings at threshold 0.500
97
+
98
+ | True / Predicted | NOT_RELEVANT | RELEVANT |
99
+ | --- | ---: | ---: |
100
+ | NOT_RELEVANT | 882 | 22 |
101
+ | RELEVANT | 38 | 74 |
102
+
103
+ ### embedding-svm_sentence_embeddings at threshold 0.276
104
+
105
+ | True / Predicted | NOT_RELEVANT | RELEVANT |
106
+ | --- | ---: | ---: |
107
+ | NOT_RELEVANT | 858 | 46 |
108
+ | RELEVANT | 20 | 92 |
109
+
110
+ ### embedding-lightgbm_sentence_embeddings at threshold 0.500
111
+
112
+ | True / Predicted | NOT_RELEVANT | RELEVANT |
113
+ | --- | ---: | ---: |
114
+ | NOT_RELEVANT | 883 | 21 |
115
+ | RELEVANT | 34 | 78 |
116
+
117
+ ### embedding-lightgbm_sentence_embeddings at threshold 0.052
118
+
119
+ | True / Predicted | NOT_RELEVANT | RELEVANT |
120
+ | --- | ---: | ---: |
121
+ | NOT_RELEVANT | 856 | 48 |
122
+ | RELEVANT | 20 | 92 |
123
+
124
+ ### transformer at threshold 0.500
125
+
126
+ | True / Predicted | NOT_RELEVANT | RELEVANT |
127
+ | --- | ---: | ---: |
128
+ | NOT_RELEVANT | 873 | 31 |
129
+ | RELEVANT | 20 | 92 |
130
+
131
+ ### transformer at threshold 0.616
132
+
133
+ | True / Predicted | NOT_RELEVANT | RELEVANT |
134
+ | --- | ---: | ---: |
135
+ | NOT_RELEVANT | 873 | 31 |
136
+ | RELEVANT | 20 | 92 |
137
+
138
+
139
+ ## Validation-Tuned Thresholds
140
+
141
+ - `logistic_tfidf`: threshold `0.608` (validation F1 `0.578`); test F1 change vs 0.5: `-0.077`.
142
+ - `xgboost_tfidf`: threshold `0.177` (validation F1 `0.581`); test F1 change vs 0.5: `+0.111`.
143
+ - `embedding-logistic_sentence_embeddings`: threshold `0.616` (validation F1 `0.728`); test F1 change vs 0.5: `+0.074`.
144
+ - `embedding-svm_sentence_embeddings`: threshold `0.276` (validation F1 `0.731`); test F1 change vs 0.5: `+0.024`.
145
+ - `embedding-lightgbm_sentence_embeddings`: threshold `0.052` (validation F1 `0.739`); test F1 change vs 0.5: `-0.009`.
146
+ - `transformer`: threshold `0.616` (validation F1 `0.807`); test F1 change vs 0.5: `+0.000`.
147
+
148
+ ## Artifacts
149
+
150
+ - `logistic_tfidf`: `/content/agri-utilization-classifier/baselines/logistic`
151
+ - `xgboost_tfidf`: `/content/agri-utilization-classifier/baselines/xgboost`
152
+ - `embedding-logistic_sentence_embeddings`: `/content/agri-utilization-classifier/baselines/embedding-logistic`
153
+ - `embedding-svm_sentence_embeddings`: `/content/agri-utilization-classifier/baselines/embedding-svm`
154
+ - `embedding-lightgbm_sentence_embeddings`: `/content/agri-utilization-classifier/baselines/embedding-lightgbm`
155
+ - `transformer`: `/content/agri-utilization-classifier/transformer`
156
+
157
+ ## Inference
158
+
159
+ Install the runtime dependencies:
160
+
161
+ ```bash
162
+ pip install transformers torch huggingface_hub pandas joblib scikit-learn xgboost sentence-transformers lightgbm
163
+ ```
164
+
165
+ ### Transformer
166
+
167
+ ```python
168
+ import torch
169
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
170
+
171
+ MODEL_ID = "faodl/agri-utilization-classifier"
172
+
173
+ texts = [
174
+ "Rice export prices increased after new procurement rules were announced.",
175
+ "The finance ministry released its monthly fuel tax bulletin.",
176
+ ]
177
+
178
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, subfolder="transformer")
179
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID, subfolder="transformer")
180
+ threshold = float(getattr(model.config, "threshold", 0.5))
181
+
182
+ encoded = tokenizer(
183
+ texts,
184
+ truncation=True,
185
+ padding=True,
186
+ max_length=256,
187
+ return_tensors="pt",
188
+ )
189
+
190
+ with torch.no_grad():
191
+ logits = model(**encoded).logits
192
+ probabilities = torch.softmax(logits, dim=-1)[:, 1].tolist()
193
+
194
+ for text, probability in zip(texts, probabilities):
195
+ label = model.config.id2label[int(probability >= threshold)]
196
+ print({"text": text, "probability_positive": probability, "label": label})
197
+ ```
198
+
199
+ ### TF-IDF Baselines
200
+
201
+ Available baseline names in this run: "logistic", "xgboost".
202
+
203
+ ```python
204
+ import json
205
+ import joblib
206
+ from huggingface_hub import hf_hub_download
207
+
208
+ MODEL_ID = "faodl/agri-utilization-classifier"
209
+ BASELINE = "logistic"
210
+
211
+ texts = [
212
+ "Maize production forecasts were revised after delayed rains.",
213
+ "The central bank published new exchange rate statistics.",
214
+ ]
215
+
216
+ model_path = hf_hub_download(
217
+ repo_id=MODEL_ID,
218
+ repo_type="model",
219
+ filename=f"baselines/{BASELINE}/{BASELINE}_tfidf.joblib",
220
+ )
221
+ report_path = hf_hub_download(
222
+ repo_id=MODEL_ID,
223
+ repo_type="model",
224
+ filename="report.json",
225
+ )
226
+
227
+ pipeline = joblib.load(model_path)
228
+ with open(report_path, encoding="utf-8") as handle:
229
+ report = json.load(handle)
230
+
231
+ threshold = next(
232
+ result["validation_best_threshold"]["threshold"]
233
+ for result in report["results"]
234
+ if result["model_type"] == f"{BASELINE}_tfidf"
235
+ )
236
+
237
+ probabilities = pipeline.predict_proba(texts)[:, 1]
238
+ for text, probability in zip(texts, probabilities):
239
+ label = "RELEVANT" if probability >= threshold else "NOT_RELEVANT"
240
+ print({"text": text, "probability_positive": float(probability), "label": label})
241
+ ```
242
+
243
+ ### Sentence-Embedding Baselines
244
+
245
+ Available embedding baseline names in this run: "embedding-logistic", "embedding-svm", "embedding-lightgbm".
246
+
247
+ ```python
248
+ import joblib
249
+ from huggingface_hub import hf_hub_download
250
+ from sentence_transformers import SentenceTransformer
251
+
252
+ MODEL_ID = "faodl/agri-utilization-classifier"
253
+ BASELINE = "embedding-logistic"
254
+
255
+ texts = [
256
+ "Wheat export inspections rose as demand from importers increased.",
257
+ "The sports ministry announced a new stadium renovation plan.",
258
+ ]
259
+
260
+ model_path = hf_hub_download(
261
+ repo_id=MODEL_ID,
262
+ repo_type="model",
263
+ filename=f"baselines/{BASELINE}/{BASELINE}.joblib",
264
+ )
265
+ artifact = joblib.load(model_path)
266
+ embedding_model = SentenceTransformer(artifact["embedding_model_name"])
267
+ embeddings = embedding_model.encode(
268
+ texts,
269
+ batch_size=artifact.get("embedding_batch_size", 64),
270
+ convert_to_numpy=True,
271
+ normalize_embeddings=artifact.get("normalize_embeddings", True),
272
+ )
273
+ probabilities = artifact["classifier"].predict_proba(embeddings)[:, 1]
274
+ threshold = artifact["validation_best_threshold"]["threshold"]
275
+
276
+ for text, probability in zip(texts, probabilities):
277
+ label = "RELEVANT" if probability >= threshold else "NOT_RELEVANT"
278
+ print({"text": text, "probability_positive": float(probability), "label": label})
279
+ ```
280
+
281
+ ## Files
282
+
283
+ - `REPORT.md`: Markdown report for this training run.
284
+ - `report.json`: Machine-readable report containing metrics and thresholds.
285
+ - `transformer/`: Fine-tuned Transformer artifacts, when Transformer training is enabled.
286
+ - `baselines/`: TF-IDF and sentence-embedding baseline artifacts, when baseline training is enabled.
287
+ - `*/validation_predictions.csv` and `*/test_predictions.csv`: Split-level predictions.
REPORT.md ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AMIS Commodity Classifier Training Report
2
+
3
+ - Dataset: `faodl/amis-agri-utilization`
4
+ - Dataset subset: ``
5
+ - Text column: `chunk_text`
6
+ - Label column: `label`
7
+ - Transformer: `FacebookAI/xlm-roberta-base`
8
+ - Generated at: `2026-05-25T19:23:29.605062+00:00`
9
+
10
+ ## Dataset Summary
11
+
12
+ | Split | Rows | Label 0 | Label 1 | Unique groups | Mean text length |
13
+ | --- | ---: | ---: | ---: | ---: | ---: |
14
+ | train | 4877 | 4347 | 530 | 2513 | 696.6 |
15
+ | validation | 978 | 899 | 79 | 538 | 690.6 |
16
+ | test | 1016 | 904 | 112 | 539 | 690.7 |
17
+
18
+ ## Threshold Comparison on Test Split
19
+
20
+ | Model | Threshold | Accuracy | Precision | Recall | F1 | ROC AUC | Average precision |
21
+ | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
22
+ | logistic_tfidf | 0.500 | 0.926 | 0.691 | 0.598 | 0.641 | 0.899 | 0.726 |
23
+ | logistic_tfidf | 0.608 | 0.930 | 0.902 | 0.411 | 0.564 | 0.899 | 0.726 |
24
+ | xgboost_tfidf | 0.500 | 0.924 | 1.000 | 0.312 | 0.476 | 0.892 | 0.692 |
25
+ | xgboost_tfidf | 0.177 | 0.918 | 0.663 | 0.527 | 0.587 | 0.892 | 0.692 |
26
+ | embedding-logistic_sentence_embeddings | 0.500 | 0.899 | 0.524 | 0.866 | 0.653 | 0.952 | 0.759 |
27
+ | embedding-logistic_sentence_embeddings | 0.616 | 0.929 | 0.632 | 0.857 | 0.727 | 0.952 | 0.759 |
28
+ | embedding-svm_sentence_embeddings | 0.500 | 0.941 | 0.771 | 0.661 | 0.712 | 0.952 | 0.743 |
29
+ | embedding-svm_sentence_embeddings | 0.276 | 0.935 | 0.667 | 0.821 | 0.736 | 0.952 | 0.743 |
30
+ | embedding-lightgbm_sentence_embeddings | 0.500 | 0.946 | 0.788 | 0.696 | 0.739 | 0.959 | 0.801 |
31
+ | embedding-lightgbm_sentence_embeddings | 0.052 | 0.933 | 0.657 | 0.821 | 0.730 | 0.959 | 0.801 |
32
+ | transformer | 0.500 | 0.950 | 0.748 | 0.821 | 0.783 | 0.951 | 0.785 |
33
+ | transformer | 0.616 | 0.950 | 0.748 | 0.821 | 0.783 | 0.951 | 0.785 |
34
+
35
+ ## Confusion Matrices on Test Split
36
+
37
+ Rows are true labels and columns are predicted labels.
38
+
39
+ ### logistic_tfidf at threshold 0.500
40
+
41
+ | True / Predicted | NOT_RELEVANT | RELEVANT |
42
+ | --- | ---: | ---: |
43
+ | NOT_RELEVANT | 874 | 30 |
44
+ | RELEVANT | 45 | 67 |
45
+
46
+ ### logistic_tfidf at threshold 0.608
47
+
48
+ | True / Predicted | NOT_RELEVANT | RELEVANT |
49
+ | --- | ---: | ---: |
50
+ | NOT_RELEVANT | 899 | 5 |
51
+ | RELEVANT | 66 | 46 |
52
+
53
+ ### xgboost_tfidf at threshold 0.500
54
+
55
+ | True / Predicted | NOT_RELEVANT | RELEVANT |
56
+ | --- | ---: | ---: |
57
+ | NOT_RELEVANT | 904 | 0 |
58
+ | RELEVANT | 77 | 35 |
59
+
60
+ ### xgboost_tfidf at threshold 0.177
61
+
62
+ | True / Predicted | NOT_RELEVANT | RELEVANT |
63
+ | --- | ---: | ---: |
64
+ | NOT_RELEVANT | 874 | 30 |
65
+ | RELEVANT | 53 | 59 |
66
+
67
+ ### embedding-logistic_sentence_embeddings at threshold 0.500
68
+
69
+ | True / Predicted | NOT_RELEVANT | RELEVANT |
70
+ | --- | ---: | ---: |
71
+ | NOT_RELEVANT | 816 | 88 |
72
+ | RELEVANT | 15 | 97 |
73
+
74
+ ### embedding-logistic_sentence_embeddings at threshold 0.616
75
+
76
+ | True / Predicted | NOT_RELEVANT | RELEVANT |
77
+ | --- | ---: | ---: |
78
+ | NOT_RELEVANT | 848 | 56 |
79
+ | RELEVANT | 16 | 96 |
80
+
81
+ ### embedding-svm_sentence_embeddings at threshold 0.500
82
+
83
+ | True / Predicted | NOT_RELEVANT | RELEVANT |
84
+ | --- | ---: | ---: |
85
+ | NOT_RELEVANT | 882 | 22 |
86
+ | RELEVANT | 38 | 74 |
87
+
88
+ ### embedding-svm_sentence_embeddings at threshold 0.276
89
+
90
+ | True / Predicted | NOT_RELEVANT | RELEVANT |
91
+ | --- | ---: | ---: |
92
+ | NOT_RELEVANT | 858 | 46 |
93
+ | RELEVANT | 20 | 92 |
94
+
95
+ ### embedding-lightgbm_sentence_embeddings at threshold 0.500
96
+
97
+ | True / Predicted | NOT_RELEVANT | RELEVANT |
98
+ | --- | ---: | ---: |
99
+ | NOT_RELEVANT | 883 | 21 |
100
+ | RELEVANT | 34 | 78 |
101
+
102
+ ### embedding-lightgbm_sentence_embeddings at threshold 0.052
103
+
104
+ | True / Predicted | NOT_RELEVANT | RELEVANT |
105
+ | --- | ---: | ---: |
106
+ | NOT_RELEVANT | 856 | 48 |
107
+ | RELEVANT | 20 | 92 |
108
+
109
+ ### transformer at threshold 0.500
110
+
111
+ | True / Predicted | NOT_RELEVANT | RELEVANT |
112
+ | --- | ---: | ---: |
113
+ | NOT_RELEVANT | 873 | 31 |
114
+ | RELEVANT | 20 | 92 |
115
+
116
+ ### transformer at threshold 0.616
117
+
118
+ | True / Predicted | NOT_RELEVANT | RELEVANT |
119
+ | --- | ---: | ---: |
120
+ | NOT_RELEVANT | 873 | 31 |
121
+ | RELEVANT | 20 | 92 |
122
+
123
+
124
+ ## Validation-Tuned Thresholds
125
+
126
+ - `logistic_tfidf`: threshold `0.608` (validation F1 `0.578`); test F1 change vs 0.5: `-0.077`.
127
+ - `xgboost_tfidf`: threshold `0.177` (validation F1 `0.581`); test F1 change vs 0.5: `+0.111`.
128
+ - `embedding-logistic_sentence_embeddings`: threshold `0.616` (validation F1 `0.728`); test F1 change vs 0.5: `+0.074`.
129
+ - `embedding-svm_sentence_embeddings`: threshold `0.276` (validation F1 `0.731`); test F1 change vs 0.5: `+0.024`.
130
+ - `embedding-lightgbm_sentence_embeddings`: threshold `0.052` (validation F1 `0.739`); test F1 change vs 0.5: `-0.009`.
131
+ - `transformer`: threshold `0.616` (validation F1 `0.807`); test F1 change vs 0.5: `+0.000`.
132
+
133
+ ## Artifacts
134
+
135
+ - `logistic_tfidf`: `/content/agri-utilization-classifier/baselines/logistic`
136
+ - `xgboost_tfidf`: `/content/agri-utilization-classifier/baselines/xgboost`
137
+ - `embedding-logistic_sentence_embeddings`: `/content/agri-utilization-classifier/baselines/embedding-logistic`
138
+ - `embedding-svm_sentence_embeddings`: `/content/agri-utilization-classifier/baselines/embedding-svm`
139
+ - `embedding-lightgbm_sentence_embeddings`: `/content/agri-utilization-classifier/baselines/embedding-lightgbm`
140
+ - `transformer`: `/content/agri-utilization-classifier/transformer`
baselines/embedding-lightgbm/embedding-lightgbm.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a14be333902e726d49155cf98ec689843edfa4320b39724da54a187bea078e8
3
+ size 1467460
baselines/embedding-lightgbm/test_predictions.csv ADDED
The diff for this file is too large to render. See raw diff
 
baselines/embedding-lightgbm/validation_predictions.csv ADDED
The diff for this file is too large to render. See raw diff
 
baselines/embedding-logistic/embedding-logistic.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:596282c69402bd7479f4057afeaeeec5cc81d9c13bede61569f3be96207798f0
3
+ size 4287
baselines/embedding-logistic/test_predictions.csv ADDED
The diff for this file is too large to render. See raw diff
 
baselines/embedding-logistic/validation_predictions.csv ADDED
The diff for this file is too large to render. See raw diff
 
baselines/embedding-svm/embedding-svm.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4dcb68c9d78767b36ec44c943e7085a53ccbf4fc61e5568acaf2d3cf442f72e
3
+ size 11696
baselines/embedding-svm/test_predictions.csv ADDED
The diff for this file is too large to render. See raw diff
 
baselines/embedding-svm/validation_predictions.csv ADDED
The diff for this file is too large to render. See raw diff
 
baselines/logistic/logistic_tfidf.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:988b232ccc0c55fa1116c0885058e6200246e9dbe050debf6f5edfa81e0438e7
3
+ size 2452308
baselines/logistic/test_predictions.csv ADDED
The diff for this file is too large to render. See raw diff
 
baselines/logistic/validation_predictions.csv ADDED
The diff for this file is too large to render. See raw diff
 
baselines/xgboost/test_predictions.csv ADDED
The diff for this file is too large to render. See raw diff
 
baselines/xgboost/validation_predictions.csv ADDED
The diff for this file is too large to render. See raw diff
 
baselines/xgboost/xgboost_tfidf.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75dae90ae561b6e87b2fd736393208127db3493eb3df7a2232490a3a60238d1b
3
+ size 2494551
report.json ADDED
@@ -0,0 +1,704 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_at": "2026-05-25T19:23:29.605062+00:00",
3
+ "config": {
4
+ "hf_dataset": "faodl/amis-agri-utilization",
5
+ "hf_subset": null,
6
+ "train_split": "train",
7
+ "validation_split": "validation",
8
+ "test_split": "test",
9
+ "text_col": "chunk_text",
10
+ "label_col": "label",
11
+ "group_col": "id",
12
+ "id_col": "chunk_id",
13
+ "model_name": "FacebookAI/xlm-roberta-base",
14
+ "output_dir": "/content/agri-utilization-classifier",
15
+ "max_length": 256,
16
+ "learning_rate": 2e-05,
17
+ "weight_decay": 0.01,
18
+ "num_train_epochs": 5.0,
19
+ "per_device_train_batch_size": 16,
20
+ "per_device_eval_batch_size": 32,
21
+ "gradient_accumulation_steps": 1,
22
+ "warmup_ratio": 0.1,
23
+ "early_stopping_patience": 2,
24
+ "seed": 42,
25
+ "metric_for_best_model": "f1",
26
+ "skip_transformer": false,
27
+ "skip_baselines": false,
28
+ "baseline_models": [
29
+ "logistic",
30
+ "xgboost",
31
+ "embedding-logistic",
32
+ "embedding-svm",
33
+ "embedding-lightgbm"
34
+ ],
35
+ "tfidf_max_features": 50000,
36
+ "embedding_model_name": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
37
+ "embedding_batch_size": 64,
38
+ "positive_label_name": "RELEVANT",
39
+ "negative_label_name": "NOT_RELEVANT",
40
+ "push_to_hub": true,
41
+ "hub_model_id": "faodl/agri-utilization-classifier",
42
+ "hub_private_repo": false
43
+ },
44
+ "dataset_summary": {
45
+ "train": {
46
+ "rows": 4877,
47
+ "labels": {
48
+ "0": 4347,
49
+ "1": 530
50
+ },
51
+ "unique_groups": 2513,
52
+ "text_length_mean": 696.6221037523068,
53
+ "text_length_median": 794.0
54
+ },
55
+ "validation": {
56
+ "rows": 978,
57
+ "labels": {
58
+ "0": 899,
59
+ "1": 79
60
+ },
61
+ "unique_groups": 538,
62
+ "text_length_mean": 690.6196319018405,
63
+ "text_length_median": 794.0
64
+ },
65
+ "test": {
66
+ "rows": 1016,
67
+ "labels": {
68
+ "0": 904,
69
+ "1": 112
70
+ },
71
+ "unique_groups": 539,
72
+ "text_length_mean": 690.6929133858267,
73
+ "text_length_median": 794.0
74
+ }
75
+ },
76
+ "results": [
77
+ {
78
+ "model_type": "logistic_tfidf",
79
+ "model_name": "logistic",
80
+ "artifact_dir": "/content/agri-utilization-classifier/baselines/logistic",
81
+ "artifact_file": "/content/agri-utilization-classifier/baselines/logistic/logistic_tfidf.joblib",
82
+ "validation_best_threshold": {
83
+ "threshold": 0.6076606929552563,
84
+ "f1": 0.5777777777777778,
85
+ "precision": 0.6964285714285714,
86
+ "recall": 0.4936708860759494
87
+ },
88
+ "test_default_0_5": {
89
+ "threshold": 0.5,
90
+ "accuracy": 0.9261811023622047,
91
+ "precision": 0.6907216494845361,
92
+ "recall": 0.5982142857142857,
93
+ "f1": 0.6411483253588517,
94
+ "confusion_matrix": [
95
+ [
96
+ 874,
97
+ 30
98
+ ],
99
+ [
100
+ 45,
101
+ 67
102
+ ]
103
+ ],
104
+ "classification_report": {
105
+ "NOT_RELEVANT": {
106
+ "precision": 0.9510337323177367,
107
+ "recall": 0.9668141592920354,
108
+ "f1-score": 0.9588590235874932,
109
+ "support": 904.0
110
+ },
111
+ "RELEVANT": {
112
+ "precision": 0.6907216494845361,
113
+ "recall": 0.5982142857142857,
114
+ "f1-score": 0.6411483253588517,
115
+ "support": 112.0
116
+ },
117
+ "accuracy": 0.9261811023622047,
118
+ "macro avg": {
119
+ "precision": 0.8208776909011364,
120
+ "recall": 0.7825142225031605,
121
+ "f1-score": 0.8000036744731724,
122
+ "support": 1016.0
123
+ },
124
+ "weighted avg": {
125
+ "precision": 0.9223379121628957,
126
+ "recall": 0.9261811023622047,
127
+ "f1-score": 0.9238357970111075,
128
+ "support": 1016.0
129
+ }
130
+ },
131
+ "roc_auc": 0.8990004740834386,
132
+ "average_precision": 0.7262348311700503
133
+ },
134
+ "test_optimal_threshold": {
135
+ "threshold": 0.6076606929552563,
136
+ "accuracy": 0.9301181102362205,
137
+ "precision": 0.9019607843137255,
138
+ "recall": 0.4107142857142857,
139
+ "f1": 0.5644171779141104,
140
+ "confusion_matrix": [
141
+ [
142
+ 899,
143
+ 5
144
+ ],
145
+ [
146
+ 66,
147
+ 46
148
+ ]
149
+ ],
150
+ "classification_report": {
151
+ "NOT_RELEVANT": {
152
+ "precision": 0.9316062176165804,
153
+ "recall": 0.9944690265486725,
154
+ "f1-score": 0.962011771000535,
155
+ "support": 904.0
156
+ },
157
+ "RELEVANT": {
158
+ "precision": 0.9019607843137255,
159
+ "recall": 0.4107142857142857,
160
+ "f1-score": 0.5644171779141104,
161
+ "support": 112.0
162
+ },
163
+ "accuracy": 0.9301181102362205,
164
+ "macro avg": {
165
+ "precision": 0.9167835009651529,
166
+ "recall": 0.7025916561314791,
167
+ "f1-score": 0.7632144744573227,
168
+ "support": 1016.0
169
+ },
170
+ "weighted avg": {
171
+ "precision": 0.9283382170950057,
172
+ "recall": 0.9301181102362205,
173
+ "f1-score": 0.9181824457784095,
174
+ "support": 1016.0
175
+ }
176
+ },
177
+ "roc_auc": 0.8990004740834386,
178
+ "average_precision": 0.7262348311700503
179
+ }
180
+ },
181
+ {
182
+ "model_type": "xgboost_tfidf",
183
+ "model_name": "xgboost",
184
+ "artifact_dir": "/content/agri-utilization-classifier/baselines/xgboost",
185
+ "artifact_file": "/content/agri-utilization-classifier/baselines/xgboost/xgboost_tfidf.joblib",
186
+ "validation_best_threshold": {
187
+ "threshold": 0.17728303372859955,
188
+ "f1": 0.5806451612903226,
189
+ "precision": 0.5921052631578947,
190
+ "recall": 0.569620253164557
191
+ },
192
+ "test_default_0_5": {
193
+ "threshold": 0.5,
194
+ "accuracy": 0.9242125984251969,
195
+ "precision": 1.0,
196
+ "recall": 0.3125,
197
+ "f1": 0.47619047619047616,
198
+ "confusion_matrix": [
199
+ [
200
+ 904,
201
+ 0
202
+ ],
203
+ [
204
+ 77,
205
+ 35
206
+ ]
207
+ ],
208
+ "classification_report": {
209
+ "NOT_RELEVANT": {
210
+ "precision": 0.9215086646279307,
211
+ "recall": 1.0,
212
+ "f1-score": 0.9591511936339523,
213
+ "support": 904.0
214
+ },
215
+ "RELEVANT": {
216
+ "precision": 1.0,
217
+ "recall": 0.3125,
218
+ "f1-score": 0.47619047619047616,
219
+ "support": 112.0
220
+ },
221
+ "accuracy": 0.9242125984251969,
222
+ "macro avg": {
223
+ "precision": 0.9607543323139653,
224
+ "recall": 0.65625,
225
+ "f1-score": 0.7176708349122143,
226
+ "support": 1016.0
227
+ },
228
+ "weighted avg": {
229
+ "precision": 0.9301612527791825,
230
+ "recall": 0.9242125984251969,
231
+ "f1-score": 0.905911429506325,
232
+ "support": 1016.0
233
+ }
234
+ },
235
+ "roc_auc": 0.8921114491150443,
236
+ "average_precision": 0.6916666494483661
237
+ },
238
+ "test_optimal_threshold": {
239
+ "threshold": 0.17728303372859955,
240
+ "accuracy": 0.9183070866141733,
241
+ "precision": 0.6629213483146067,
242
+ "recall": 0.5267857142857143,
243
+ "f1": 0.5870646766169154,
244
+ "confusion_matrix": [
245
+ [
246
+ 874,
247
+ 30
248
+ ],
249
+ [
250
+ 53,
251
+ 59
252
+ ]
253
+ ],
254
+ "classification_report": {
255
+ "NOT_RELEVANT": {
256
+ "precision": 0.9428263214670982,
257
+ "recall": 0.9668141592920354,
258
+ "f1-score": 0.9546695794647734,
259
+ "support": 904.0
260
+ },
261
+ "RELEVANT": {
262
+ "precision": 0.6629213483146067,
263
+ "recall": 0.5267857142857143,
264
+ "f1-score": 0.5870646766169154,
265
+ "support": 112.0
266
+ },
267
+ "accuracy": 0.9183070866141733,
268
+ "macro avg": {
269
+ "precision": 0.8028738348908524,
270
+ "recall": 0.7467999367888749,
271
+ "f1-score": 0.7708671280408443,
272
+ "support": 1016.0
273
+ },
274
+ "weighted avg": {
275
+ "precision": 0.9119706551353274,
276
+ "recall": 0.9183070866141733,
277
+ "f1-score": 0.9141462043476867,
278
+ "support": 1016.0
279
+ }
280
+ },
281
+ "roc_auc": 0.8921114491150443,
282
+ "average_precision": 0.6916666494483661
283
+ }
284
+ },
285
+ {
286
+ "model_type": "embedding-logistic_sentence_embeddings",
287
+ "model_name": "logistic",
288
+ "embedding_model_name": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
289
+ "artifact_dir": "/content/agri-utilization-classifier/baselines/embedding-logistic",
290
+ "artifact_file": "/content/agri-utilization-classifier/baselines/embedding-logistic/embedding-logistic.joblib",
291
+ "validation_best_threshold": {
292
+ "threshold": 0.616087721531811,
293
+ "f1": 0.7282051282051282,
294
+ "precision": 0.6120689655172413,
295
+ "recall": 0.8987341772151899
296
+ },
297
+ "test_default_0_5": {
298
+ "threshold": 0.5,
299
+ "accuracy": 0.8986220472440944,
300
+ "precision": 0.5243243243243243,
301
+ "recall": 0.8660714285714286,
302
+ "f1": 0.6531986531986532,
303
+ "confusion_matrix": [
304
+ [
305
+ 816,
306
+ 88
307
+ ],
308
+ [
309
+ 15,
310
+ 97
311
+ ]
312
+ ],
313
+ "classification_report": {
314
+ "NOT_RELEVANT": {
315
+ "precision": 0.9819494584837545,
316
+ "recall": 0.9026548672566371,
317
+ "f1-score": 0.9406340057636887,
318
+ "support": 904.0
319
+ },
320
+ "RELEVANT": {
321
+ "precision": 0.5243243243243243,
322
+ "recall": 0.8660714285714286,
323
+ "f1-score": 0.6531986531986532,
324
+ "support": 112.0
325
+ },
326
+ "accuracy": 0.8986220472440944,
327
+ "macro avg": {
328
+ "precision": 0.7531368914040394,
329
+ "recall": 0.8843631479140328,
330
+ "f1-score": 0.796916329481171,
331
+ "support": 1016.0
332
+ },
333
+ "weighted avg": {
334
+ "precision": 0.9315025933008252,
335
+ "recall": 0.8986220472440944,
336
+ "f1-score": 0.9089482188667557,
337
+ "support": 1016.0
338
+ }
339
+ },
340
+ "roc_auc": 0.9523842446270544,
341
+ "average_precision": 0.7588349048416645
342
+ },
343
+ "test_optimal_threshold": {
344
+ "threshold": 0.616087721531811,
345
+ "accuracy": 0.9291338582677166,
346
+ "precision": 0.631578947368421,
347
+ "recall": 0.8571428571428571,
348
+ "f1": 0.7272727272727273,
349
+ "confusion_matrix": [
350
+ [
351
+ 848,
352
+ 56
353
+ ],
354
+ [
355
+ 16,
356
+ 96
357
+ ]
358
+ ],
359
+ "classification_report": {
360
+ "NOT_RELEVANT": {
361
+ "precision": 0.9814814814814815,
362
+ "recall": 0.9380530973451328,
363
+ "f1-score": 0.9592760180995475,
364
+ "support": 904.0
365
+ },
366
+ "RELEVANT": {
367
+ "precision": 0.631578947368421,
368
+ "recall": 0.8571428571428571,
369
+ "f1-score": 0.7272727272727273,
370
+ "support": 112.0
371
+ },
372
+ "accuracy": 0.9291338582677166,
373
+ "macro avg": {
374
+ "precision": 0.8065302144249513,
375
+ "recall": 0.8975979772439949,
376
+ "f1-score": 0.8432743726861374,
377
+ "support": 1016.0
378
+ },
379
+ "weighted avg": {
380
+ "precision": 0.9429095485871283,
381
+ "recall": 0.9291338582677166,
382
+ "f1-score": 0.9337008521816303,
383
+ "support": 1016.0
384
+ }
385
+ },
386
+ "roc_auc": 0.9523842446270544,
387
+ "average_precision": 0.7588349048416645
388
+ }
389
+ },
390
+ {
391
+ "model_type": "embedding-svm_sentence_embeddings",
392
+ "model_name": "svm",
393
+ "embedding_model_name": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
394
+ "artifact_dir": "/content/agri-utilization-classifier/baselines/embedding-svm",
395
+ "artifact_file": "/content/agri-utilization-classifier/baselines/embedding-svm/embedding-svm.joblib",
396
+ "validation_best_threshold": {
397
+ "threshold": 0.27629376276966117,
398
+ "f1": 0.7314285714285714,
399
+ "precision": 0.6666666666666666,
400
+ "recall": 0.810126582278481
401
+ },
402
+ "test_default_0_5": {
403
+ "threshold": 0.5,
404
+ "accuracy": 0.9409448818897638,
405
+ "precision": 0.7708333333333334,
406
+ "recall": 0.6607142857142857,
407
+ "f1": 0.7115384615384616,
408
+ "confusion_matrix": [
409
+ [
410
+ 882,
411
+ 22
412
+ ],
413
+ [
414
+ 38,
415
+ 74
416
+ ]
417
+ ],
418
+ "classification_report": {
419
+ "NOT_RELEVANT": {
420
+ "precision": 0.9586956521739131,
421
+ "recall": 0.9756637168141593,
422
+ "f1-score": 0.9671052631578947,
423
+ "support": 904.0
424
+ },
425
+ "RELEVANT": {
426
+ "precision": 0.7708333333333334,
427
+ "recall": 0.6607142857142857,
428
+ "f1-score": 0.7115384615384616,
429
+ "support": 112.0
430
+ },
431
+ "accuracy": 0.9409448818897638,
432
+ "macro avg": {
433
+ "precision": 0.8647644927536232,
434
+ "recall": 0.8181890012642226,
435
+ "f1-score": 0.8393218623481782,
436
+ "support": 1016.0
437
+ },
438
+ "weighted avg": {
439
+ "precision": 0.9379864201757389,
440
+ "recall": 0.9409448818897638,
441
+ "f1-score": 0.9389325448691382,
442
+ "support": 1016.0
443
+ }
444
+ },
445
+ "roc_auc": 0.9517817635903919,
446
+ "average_precision": 0.743247391124005
447
+ },
448
+ "test_optimal_threshold": {
449
+ "threshold": 0.27629376276966117,
450
+ "accuracy": 0.9350393700787402,
451
+ "precision": 0.6666666666666666,
452
+ "recall": 0.8214285714285714,
453
+ "f1": 0.736,
454
+ "confusion_matrix": [
455
+ [
456
+ 858,
457
+ 46
458
+ ],
459
+ [
460
+ 20,
461
+ 92
462
+ ]
463
+ ],
464
+ "classification_report": {
465
+ "NOT_RELEVANT": {
466
+ "precision": 0.9772209567198178,
467
+ "recall": 0.9491150442477876,
468
+ "f1-score": 0.9629629629629629,
469
+ "support": 904.0
470
+ },
471
+ "RELEVANT": {
472
+ "precision": 0.6666666666666666,
473
+ "recall": 0.8214285714285714,
474
+ "f1-score": 0.736,
475
+ "support": 112.0
476
+ },
477
+ "accuracy": 0.9350393700787402,
478
+ "macro avg": {
479
+ "precision": 0.8219438116932423,
480
+ "recall": 0.8852718078381795,
481
+ "f1-score": 0.8494814814814815,
482
+ "support": 1016.0
483
+ },
484
+ "weighted avg": {
485
+ "precision": 0.9429866255328562,
486
+ "recall": 0.9350393700787402,
487
+ "f1-score": 0.9379434237386993,
488
+ "support": 1016.0
489
+ }
490
+ },
491
+ "roc_auc": 0.9517817635903919,
492
+ "average_precision": 0.743247391124005
493
+ }
494
+ },
495
+ {
496
+ "model_type": "embedding-lightgbm_sentence_embeddings",
497
+ "model_name": "lightgbm",
498
+ "embedding_model_name": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
499
+ "artifact_dir": "/content/agri-utilization-classifier/baselines/embedding-lightgbm",
500
+ "artifact_file": "/content/agri-utilization-classifier/baselines/embedding-lightgbm/embedding-lightgbm.joblib",
501
+ "validation_best_threshold": {
502
+ "threshold": 0.05244099185733503,
503
+ "f1": 0.7386363636363636,
504
+ "precision": 0.6701030927835051,
505
+ "recall": 0.8227848101265823
506
+ },
507
+ "test_default_0_5": {
508
+ "threshold": 0.5,
509
+ "accuracy": 0.9458661417322834,
510
+ "precision": 0.7878787878787878,
511
+ "recall": 0.6964285714285714,
512
+ "f1": 0.7393364928909952,
513
+ "confusion_matrix": [
514
+ [
515
+ 883,
516
+ 21
517
+ ],
518
+ [
519
+ 34,
520
+ 78
521
+ ]
522
+ ],
523
+ "classification_report": {
524
+ "NOT_RELEVANT": {
525
+ "precision": 0.9629225736095965,
526
+ "recall": 0.9767699115044248,
527
+ "f1-score": 0.9697968149368479,
528
+ "support": 904.0
529
+ },
530
+ "RELEVANT": {
531
+ "precision": 0.7878787878787878,
532
+ "recall": 0.6964285714285714,
533
+ "f1-score": 0.7393364928909952,
534
+ "support": 112.0
535
+ },
536
+ "accuracy": 0.9458661417322834,
537
+ "macro avg": {
538
+ "precision": 0.8754006807441922,
539
+ "recall": 0.8365992414664981,
540
+ "f1-score": 0.8545666539139216,
541
+ "support": 1016.0
542
+ },
543
+ "weighted avg": {
544
+ "precision": 0.9436264082534445,
545
+ "recall": 0.9458661417322834,
546
+ "f1-score": 0.9443917400656515,
547
+ "support": 1016.0
548
+ }
549
+ },
550
+ "roc_auc": 0.9585078223767383,
551
+ "average_precision": 0.8011064601086128
552
+ },
553
+ "test_optimal_threshold": {
554
+ "threshold": 0.05244099185733503,
555
+ "accuracy": 0.9330708661417323,
556
+ "precision": 0.6571428571428571,
557
+ "recall": 0.8214285714285714,
558
+ "f1": 0.7301587301587301,
559
+ "confusion_matrix": [
560
+ [
561
+ 856,
562
+ 48
563
+ ],
564
+ [
565
+ 20,
566
+ 92
567
+ ]
568
+ ],
569
+ "classification_report": {
570
+ "NOT_RELEVANT": {
571
+ "precision": 0.9771689497716894,
572
+ "recall": 0.9469026548672567,
573
+ "f1-score": 0.9617977528089887,
574
+ "support": 904.0
575
+ },
576
+ "RELEVANT": {
577
+ "precision": 0.6571428571428571,
578
+ "recall": 0.8214285714285714,
579
+ "f1-score": 0.7301587301587301,
580
+ "support": 112.0
581
+ },
582
+ "accuracy": 0.9330708661417323,
583
+ "macro avg": {
584
+ "precision": 0.8171559034572733,
585
+ "recall": 0.8841656131479141,
586
+ "f1-score": 0.8459782414838595,
587
+ "support": 1016.0
588
+ },
589
+ "weighted avg": {
590
+ "precision": 0.9418904828677237,
591
+ "recall": 0.9330708661417323,
592
+ "f1-score": 0.936262742438094,
593
+ "support": 1016.0
594
+ }
595
+ },
596
+ "roc_auc": 0.9585078223767383,
597
+ "average_precision": 0.8011064601086128
598
+ }
599
+ },
600
+ {
601
+ "model_type": "transformer",
602
+ "model_name": "FacebookAI/xlm-roberta-base",
603
+ "artifact_dir": "/content/agri-utilization-classifier/transformer",
604
+ "validation_best_threshold": {
605
+ "threshold": 0.6156440377235413,
606
+ "f1": 0.8072289156626505,
607
+ "precision": 0.7701149425287356,
608
+ "recall": 0.8481012658227848
609
+ },
610
+ "test_default_0_5": {
611
+ "threshold": 0.5,
612
+ "accuracy": 0.9498031496062992,
613
+ "precision": 0.7479674796747967,
614
+ "recall": 0.8214285714285714,
615
+ "f1": 0.7829787234042553,
616
+ "confusion_matrix": [
617
+ [
618
+ 873,
619
+ 31
620
+ ],
621
+ [
622
+ 20,
623
+ 92
624
+ ]
625
+ ],
626
+ "classification_report": {
627
+ "NOT_RELEVANT": {
628
+ "precision": 0.9776035834266518,
629
+ "recall": 0.9657079646017699,
630
+ "f1-score": 0.9716193656093489,
631
+ "support": 904.0
632
+ },
633
+ "RELEVANT": {
634
+ "precision": 0.7479674796747967,
635
+ "recall": 0.8214285714285714,
636
+ "f1-score": 0.7829787234042553,
637
+ "support": 112.0
638
+ },
639
+ "accuracy": 0.9498031496062992,
640
+ "macro avg": {
641
+ "precision": 0.8627855315507242,
642
+ "recall": 0.8935682680151706,
643
+ "f1-score": 0.8772990445068021,
644
+ "support": 1016.0
645
+ },
646
+ "weighted avg": {
647
+ "precision": 0.9522893672650299,
648
+ "recall": 0.9498031496062992,
649
+ "f1-score": 0.9508243341851654,
650
+ "support": 1016.0
651
+ }
652
+ },
653
+ "roc_auc": 0.9511694058154235,
654
+ "average_precision": 0.7846734208461954
655
+ },
656
+ "test_optimal_threshold": {
657
+ "threshold": 0.6156440377235413,
658
+ "accuracy": 0.9498031496062992,
659
+ "precision": 0.7479674796747967,
660
+ "recall": 0.8214285714285714,
661
+ "f1": 0.7829787234042553,
662
+ "confusion_matrix": [
663
+ [
664
+ 873,
665
+ 31
666
+ ],
667
+ [
668
+ 20,
669
+ 92
670
+ ]
671
+ ],
672
+ "classification_report": {
673
+ "NOT_RELEVANT": {
674
+ "precision": 0.9776035834266518,
675
+ "recall": 0.9657079646017699,
676
+ "f1-score": 0.9716193656093489,
677
+ "support": 904.0
678
+ },
679
+ "RELEVANT": {
680
+ "precision": 0.7479674796747967,
681
+ "recall": 0.8214285714285714,
682
+ "f1-score": 0.7829787234042553,
683
+ "support": 112.0
684
+ },
685
+ "accuracy": 0.9498031496062992,
686
+ "macro avg": {
687
+ "precision": 0.8627855315507242,
688
+ "recall": 0.8935682680151706,
689
+ "f1-score": 0.8772990445068021,
690
+ "support": 1016.0
691
+ },
692
+ "weighted avg": {
693
+ "precision": 0.9522893672650299,
694
+ "recall": 0.9498031496062992,
695
+ "f1-score": 0.9508243341851654,
696
+ "support": 1016.0
697
+ }
698
+ },
699
+ "roc_auc": 0.9511694058154235,
700
+ "average_precision": 0.7846734208461954
701
+ }
702
+ }
703
+ ]
704
+ }
transformer/checkpoint-1220/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_cross_attention": false,
3
+ "architectures": [
4
+ "XLMRobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "dtype": "float32",
10
+ "eos_token_id": 2,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "NOT_RELEVANT",
16
+ "1": "RELEVANT"
17
+ },
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 3072,
20
+ "is_decoder": false,
21
+ "label2id": {
22
+ "NOT_RELEVANT": 0,
23
+ "RELEVANT": 1
24
+ },
25
+ "layer_norm_eps": 1e-05,
26
+ "max_position_embeddings": 514,
27
+ "model_type": "xlm-roberta",
28
+ "num_attention_heads": 12,
29
+ "num_hidden_layers": 12,
30
+ "output_past": true,
31
+ "pad_token_id": 1,
32
+ "position_embedding_type": "absolute",
33
+ "problem_type": "single_label_classification",
34
+ "tie_word_embeddings": true,
35
+ "transformers_version": "5.9.0",
36
+ "type_vocab_size": 1,
37
+ "use_cache": false,
38
+ "vocab_size": 250002
39
+ }
transformer/checkpoint-1220/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23260da79af693d20fe8414504a730a2e1fae0128c96e784d5a388b934f65ef8
3
+ size 1112205008
transformer/checkpoint-1220/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5656e89640a42e9a162840684d7e143689ced913e28cbc8709b4e3fcc33ea94
3
+ size 2224532875
transformer/checkpoint-1220/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e47023fdf7fee85f2c66207ee2960719b8bf1b11c2d946d75e0d2fe33113c7ce
3
+ size 14645
transformer/checkpoint-1220/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ad54f7b6fbdb8393459d5a595aa5dfdf4cf4c483f044be07288464f573b4d8e
3
+ size 1383
transformer/checkpoint-1220/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6adfd2a8e363fb5adf050a01658d698ef3da72d5e9b197063c5e3b6a0fe9333
3
+ size 1465
transformer/checkpoint-1220/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc02d42fb2a10276563109e2287cc0dbe6b595d5b3b3401c7cfeffc0b7e20270
3
+ size 17098351
transformer/checkpoint-1220/tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "backend": "tokenizers",
4
+ "bos_token": "<s>",
5
+ "cls_token": "<s>",
6
+ "eos_token": "</s>",
7
+ "is_local": false,
8
+ "local_files_only": false,
9
+ "mask_token": "<mask>",
10
+ "model_max_length": 512,
11
+ "pad_token": "<pad>",
12
+ "sep_token": "</s>",
13
+ "tokenizer_class": "XLMRobertaTokenizer",
14
+ "unk_token": "<unk>"
15
+ }
transformer/checkpoint-1220/trainer_state.json ADDED
@@ -0,0 +1,431 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1220,
3
+ "best_metric": 0.8,
4
+ "best_model_checkpoint": "/content/agri-utilization-classifier/transformer/checkpoint-1220",
5
+ "epoch": 4.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1220,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.08196721311475409,
14
+ "grad_norm": 6.984184741973877,
15
+ "learning_rate": 3.157894736842105e-06,
16
+ "loss": 0.7167730712890625,
17
+ "step": 25
18
+ },
19
+ {
20
+ "epoch": 0.16393442622950818,
21
+ "grad_norm": 9.77598762512207,
22
+ "learning_rate": 6.447368421052632e-06,
23
+ "loss": 0.5636273193359375,
24
+ "step": 50
25
+ },
26
+ {
27
+ "epoch": 0.2459016393442623,
28
+ "grad_norm": 8.441609382629395,
29
+ "learning_rate": 9.736842105263159e-06,
30
+ "loss": 0.37406421661376954,
31
+ "step": 75
32
+ },
33
+ {
34
+ "epoch": 0.32786885245901637,
35
+ "grad_norm": 5.313694953918457,
36
+ "learning_rate": 1.3026315789473684e-05,
37
+ "loss": 0.2965927886962891,
38
+ "step": 100
39
+ },
40
+ {
41
+ "epoch": 0.4098360655737705,
42
+ "grad_norm": 7.240467548370361,
43
+ "learning_rate": 1.6315789473684213e-05,
44
+ "loss": 0.29742313385009767,
45
+ "step": 125
46
+ },
47
+ {
48
+ "epoch": 0.4918032786885246,
49
+ "grad_norm": 6.100603103637695,
50
+ "learning_rate": 1.960526315789474e-05,
51
+ "loss": 0.2068590545654297,
52
+ "step": 150
53
+ },
54
+ {
55
+ "epoch": 0.5737704918032787,
56
+ "grad_norm": 4.169040679931641,
57
+ "learning_rate": 1.9679533867443555e-05,
58
+ "loss": 0.21712726593017578,
59
+ "step": 175
60
+ },
61
+ {
62
+ "epoch": 0.6557377049180327,
63
+ "grad_norm": 4.665876865386963,
64
+ "learning_rate": 1.9315367807720323e-05,
65
+ "loss": 0.2889243125915527,
66
+ "step": 200
67
+ },
68
+ {
69
+ "epoch": 0.7377049180327869,
70
+ "grad_norm": 1.094870924949646,
71
+ "learning_rate": 1.8951201747997088e-05,
72
+ "loss": 0.2303921699523926,
73
+ "step": 225
74
+ },
75
+ {
76
+ "epoch": 0.819672131147541,
77
+ "grad_norm": 1.2164329290390015,
78
+ "learning_rate": 1.8587035688273852e-05,
79
+ "loss": 0.16723501205444335,
80
+ "step": 250
81
+ },
82
+ {
83
+ "epoch": 0.9016393442622951,
84
+ "grad_norm": 2.6314468383789062,
85
+ "learning_rate": 1.822286962855062e-05,
86
+ "loss": 0.15685997009277344,
87
+ "step": 275
88
+ },
89
+ {
90
+ "epoch": 0.9836065573770492,
91
+ "grad_norm": 14.927972793579102,
92
+ "learning_rate": 1.7858703568827385e-05,
93
+ "loss": 0.1979808807373047,
94
+ "step": 300
95
+ },
96
+ {
97
+ "epoch": 1.0,
98
+ "eval_accuracy": 0.9642126789366053,
99
+ "eval_f1": 0.7852760736196319,
100
+ "eval_loss": 0.10679091513156891,
101
+ "eval_precision": 0.7619047619047619,
102
+ "eval_recall": 0.810126582278481,
103
+ "eval_roc_auc": 0.9606665634108222,
104
+ "eval_runtime": 3.9998,
105
+ "eval_samples_per_second": 244.514,
106
+ "eval_steps_per_second": 7.75,
107
+ "step": 305
108
+ },
109
+ {
110
+ "epoch": 1.0655737704918034,
111
+ "grad_norm": 22.521757125854492,
112
+ "learning_rate": 1.7494537509104153e-05,
113
+ "loss": 0.1316550636291504,
114
+ "step": 325
115
+ },
116
+ {
117
+ "epoch": 1.1475409836065573,
118
+ "grad_norm": 0.22687062621116638,
119
+ "learning_rate": 1.7130371449380918e-05,
120
+ "loss": 0.2059168815612793,
121
+ "step": 350
122
+ },
123
+ {
124
+ "epoch": 1.2295081967213115,
125
+ "grad_norm": 0.657261312007904,
126
+ "learning_rate": 1.6766205389657686e-05,
127
+ "loss": 0.09930330276489258,
128
+ "step": 375
129
+ },
130
+ {
131
+ "epoch": 1.3114754098360657,
132
+ "grad_norm": 4.796896457672119,
133
+ "learning_rate": 1.640203932993445e-05,
134
+ "loss": 0.13251757621765137,
135
+ "step": 400
136
+ },
137
+ {
138
+ "epoch": 1.3934426229508197,
139
+ "grad_norm": 0.5394258499145508,
140
+ "learning_rate": 1.603787327021122e-05,
141
+ "loss": 0.17834033966064453,
142
+ "step": 425
143
+ },
144
+ {
145
+ "epoch": 1.4754098360655736,
146
+ "grad_norm": 0.4655781388282776,
147
+ "learning_rate": 1.5673707210487983e-05,
148
+ "loss": 0.122637300491333,
149
+ "step": 450
150
+ },
151
+ {
152
+ "epoch": 1.5573770491803278,
153
+ "grad_norm": 0.42849695682525635,
154
+ "learning_rate": 1.530954115076475e-05,
155
+ "loss": 0.15983641624450684,
156
+ "step": 475
157
+ },
158
+ {
159
+ "epoch": 1.639344262295082,
160
+ "grad_norm": 0.45386794209480286,
161
+ "learning_rate": 1.4945375091041516e-05,
162
+ "loss": 0.14264726638793945,
163
+ "step": 500
164
+ },
165
+ {
166
+ "epoch": 1.721311475409836,
167
+ "grad_norm": 0.6170782446861267,
168
+ "learning_rate": 1.4581209031318282e-05,
169
+ "loss": 0.18886091232299804,
170
+ "step": 525
171
+ },
172
+ {
173
+ "epoch": 1.8032786885245902,
174
+ "grad_norm": 5.4546122550964355,
175
+ "learning_rate": 1.4217042971595047e-05,
176
+ "loss": 0.14393989562988282,
177
+ "step": 550
178
+ },
179
+ {
180
+ "epoch": 1.8852459016393444,
181
+ "grad_norm": 17.419189453125,
182
+ "learning_rate": 1.3852876911871815e-05,
183
+ "loss": 0.10272212982177735,
184
+ "step": 575
185
+ },
186
+ {
187
+ "epoch": 1.9672131147540983,
188
+ "grad_norm": 1.6267497539520264,
189
+ "learning_rate": 1.3488710852148582e-05,
190
+ "loss": 0.1810975456237793,
191
+ "step": 600
192
+ },
193
+ {
194
+ "epoch": 2.0,
195
+ "eval_accuracy": 0.9580777096114519,
196
+ "eval_f1": 0.7759562841530054,
197
+ "eval_loss": 0.17126062512397766,
198
+ "eval_precision": 0.6826923076923077,
199
+ "eval_recall": 0.8987341772151899,
200
+ "eval_roc_auc": 0.9631939848777121,
201
+ "eval_runtime": 3.84,
202
+ "eval_samples_per_second": 254.687,
203
+ "eval_steps_per_second": 8.073,
204
+ "step": 610
205
+ },
206
+ {
207
+ "epoch": 2.0491803278688523,
208
+ "grad_norm": 1.3180276155471802,
209
+ "learning_rate": 1.3124544792425346e-05,
210
+ "loss": 0.05921304225921631,
211
+ "step": 625
212
+ },
213
+ {
214
+ "epoch": 2.1311475409836067,
215
+ "grad_norm": 5.675038814544678,
216
+ "learning_rate": 1.2760378732702113e-05,
217
+ "loss": 0.16824769973754883,
218
+ "step": 650
219
+ },
220
+ {
221
+ "epoch": 2.2131147540983607,
222
+ "grad_norm": 0.16993092000484467,
223
+ "learning_rate": 1.239621267297888e-05,
224
+ "loss": 0.12186273574829101,
225
+ "step": 675
226
+ },
227
+ {
228
+ "epoch": 2.2950819672131146,
229
+ "grad_norm": 1.1791695356369019,
230
+ "learning_rate": 1.2032046613255645e-05,
231
+ "loss": 0.08795836448669433,
232
+ "step": 700
233
+ },
234
+ {
235
+ "epoch": 2.3770491803278686,
236
+ "grad_norm": 0.07541065663099289,
237
+ "learning_rate": 1.1667880553532412e-05,
238
+ "loss": 0.10129087448120117,
239
+ "step": 725
240
+ },
241
+ {
242
+ "epoch": 2.459016393442623,
243
+ "grad_norm": 3.394912004470825,
244
+ "learning_rate": 1.1303714493809176e-05,
245
+ "loss": 0.14056243896484374,
246
+ "step": 750
247
+ },
248
+ {
249
+ "epoch": 2.540983606557377,
250
+ "grad_norm": 8.074258804321289,
251
+ "learning_rate": 1.0939548434085944e-05,
252
+ "loss": 0.06563093185424805,
253
+ "step": 775
254
+ },
255
+ {
256
+ "epoch": 2.6229508196721314,
257
+ "grad_norm": 12.472029685974121,
258
+ "learning_rate": 1.057538237436271e-05,
259
+ "loss": 0.09851057052612305,
260
+ "step": 800
261
+ },
262
+ {
263
+ "epoch": 2.7049180327868854,
264
+ "grad_norm": 0.10368915647268295,
265
+ "learning_rate": 1.0211216314639475e-05,
266
+ "loss": 0.11658324241638184,
267
+ "step": 825
268
+ },
269
+ {
270
+ "epoch": 2.7868852459016393,
271
+ "grad_norm": 44.263092041015625,
272
+ "learning_rate": 9.847050254916243e-06,
273
+ "loss": 0.13634946823120117,
274
+ "step": 850
275
+ },
276
+ {
277
+ "epoch": 2.8688524590163933,
278
+ "grad_norm": 0.07709958404302597,
279
+ "learning_rate": 9.482884195193008e-06,
280
+ "loss": 0.12144805908203125,
281
+ "step": 875
282
+ },
283
+ {
284
+ "epoch": 2.9508196721311473,
285
+ "grad_norm": 0.11255892366170883,
286
+ "learning_rate": 9.118718135469774e-06,
287
+ "loss": 0.11815821647644043,
288
+ "step": 900
289
+ },
290
+ {
291
+ "epoch": 3.0,
292
+ "eval_accuracy": 0.9601226993865031,
293
+ "eval_f1": 0.7868852459016393,
294
+ "eval_loss": 0.14711864292621613,
295
+ "eval_precision": 0.6923076923076923,
296
+ "eval_recall": 0.9113924050632911,
297
+ "eval_roc_auc": 0.9719449177003984,
298
+ "eval_runtime": 3.9043,
299
+ "eval_samples_per_second": 250.495,
300
+ "eval_steps_per_second": 7.94,
301
+ "step": 915
302
+ },
303
+ {
304
+ "epoch": 3.0327868852459017,
305
+ "grad_norm": 0.2808685302734375,
306
+ "learning_rate": 8.754552075746541e-06,
307
+ "loss": 0.1051255989074707,
308
+ "step": 925
309
+ },
310
+ {
311
+ "epoch": 3.1147540983606556,
312
+ "grad_norm": 0.07298991084098816,
313
+ "learning_rate": 8.390386016023307e-06,
314
+ "loss": 0.08817357063293457,
315
+ "step": 950
316
+ },
317
+ {
318
+ "epoch": 3.19672131147541,
319
+ "grad_norm": 0.049921419471502304,
320
+ "learning_rate": 8.026219956300074e-06,
321
+ "loss": 0.11110530853271484,
322
+ "step": 975
323
+ },
324
+ {
325
+ "epoch": 3.278688524590164,
326
+ "grad_norm": 1.874350905418396,
327
+ "learning_rate": 7.66205389657684e-06,
328
+ "loss": 0.09003183364868164,
329
+ "step": 1000
330
+ },
331
+ {
332
+ "epoch": 3.360655737704918,
333
+ "grad_norm": 0.09576287865638733,
334
+ "learning_rate": 7.2978878368536055e-06,
335
+ "loss": 0.05897871017456055,
336
+ "step": 1025
337
+ },
338
+ {
339
+ "epoch": 3.442622950819672,
340
+ "grad_norm": 20.84284019470215,
341
+ "learning_rate": 6.933721777130372e-06,
342
+ "loss": 0.06021720886230469,
343
+ "step": 1050
344
+ },
345
+ {
346
+ "epoch": 3.5245901639344264,
347
+ "grad_norm": 0.06452233344316483,
348
+ "learning_rate": 6.569555717407138e-06,
349
+ "loss": 0.06818977355957032,
350
+ "step": 1075
351
+ },
352
+ {
353
+ "epoch": 3.6065573770491803,
354
+ "grad_norm": 0.2655308246612549,
355
+ "learning_rate": 6.2053896576839045e-06,
356
+ "loss": 0.07051475524902344,
357
+ "step": 1100
358
+ },
359
+ {
360
+ "epoch": 3.6885245901639343,
361
+ "grad_norm": 0.05852988734841347,
362
+ "learning_rate": 5.84122359796067e-06,
363
+ "loss": 0.08089996337890625,
364
+ "step": 1125
365
+ },
366
+ {
367
+ "epoch": 3.7704918032786887,
368
+ "grad_norm": 13.798267364501953,
369
+ "learning_rate": 5.477057538237437e-06,
370
+ "loss": 0.0827936840057373,
371
+ "step": 1150
372
+ },
373
+ {
374
+ "epoch": 3.8524590163934427,
375
+ "grad_norm": 6.363399982452393,
376
+ "learning_rate": 5.112891478514203e-06,
377
+ "loss": 0.06787658214569092,
378
+ "step": 1175
379
+ },
380
+ {
381
+ "epoch": 3.9344262295081966,
382
+ "grad_norm": 0.059008605778217316,
383
+ "learning_rate": 4.748725418790969e-06,
384
+ "loss": 0.05120136260986328,
385
+ "step": 1200
386
+ },
387
+ {
388
+ "epoch": 4.0,
389
+ "eval_accuracy": 0.9662576687116564,
390
+ "eval_f1": 0.8,
391
+ "eval_loss": 0.1406129151582718,
392
+ "eval_precision": 0.7674418604651163,
393
+ "eval_recall": 0.8354430379746836,
394
+ "eval_roc_auc": 0.9636797566916827,
395
+ "eval_runtime": 3.9045,
396
+ "eval_samples_per_second": 250.478,
397
+ "eval_steps_per_second": 7.939,
398
+ "step": 1220
399
+ }
400
+ ],
401
+ "logging_steps": 25,
402
+ "max_steps": 1525,
403
+ "num_input_tokens_seen": 0,
404
+ "num_train_epochs": 5,
405
+ "save_steps": 500,
406
+ "stateful_callbacks": {
407
+ "EarlyStoppingCallback": {
408
+ "args": {
409
+ "early_stopping_patience": 2,
410
+ "early_stopping_threshold": 0.0
411
+ },
412
+ "attributes": {
413
+ "early_stopping_patience_counter": 0
414
+ }
415
+ },
416
+ "TrainerControl": {
417
+ "args": {
418
+ "should_epoch_stop": false,
419
+ "should_evaluate": false,
420
+ "should_log": false,
421
+ "should_save": true,
422
+ "should_training_stop": false
423
+ },
424
+ "attributes": {}
425
+ }
426
+ },
427
+ "total_flos": 2566385233981440.0,
428
+ "train_batch_size": 16,
429
+ "trial_name": null,
430
+ "trial_params": null
431
+ }
transformer/checkpoint-1220/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c60366894b25ead0379e8d97e61f1123e1ad4786f5e41a8bc70f2d7bc8901f5
3
+ size 5329
transformer/checkpoint-1525/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_cross_attention": false,
3
+ "architectures": [
4
+ "XLMRobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "dtype": "float32",
10
+ "eos_token_id": 2,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "NOT_RELEVANT",
16
+ "1": "RELEVANT"
17
+ },
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 3072,
20
+ "is_decoder": false,
21
+ "label2id": {
22
+ "NOT_RELEVANT": 0,
23
+ "RELEVANT": 1
24
+ },
25
+ "layer_norm_eps": 1e-05,
26
+ "max_position_embeddings": 514,
27
+ "model_type": "xlm-roberta",
28
+ "num_attention_heads": 12,
29
+ "num_hidden_layers": 12,
30
+ "output_past": true,
31
+ "pad_token_id": 1,
32
+ "position_embedding_type": "absolute",
33
+ "problem_type": "single_label_classification",
34
+ "tie_word_embeddings": true,
35
+ "transformers_version": "5.9.0",
36
+ "type_vocab_size": 1,
37
+ "use_cache": false,
38
+ "vocab_size": 250002
39
+ }
transformer/checkpoint-1525/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f09e4daa3612b0a08afd0486aa5ce25a384c7e9c3abe05df4b6d1f68e5033b8
3
+ size 1112205008
transformer/checkpoint-1525/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0ce2fdbe5bad58a74bc9f05881c3817b46567f474f1c364e65e73b116534a13
3
+ size 2224532875
transformer/checkpoint-1525/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11cfcc5402da77a6739cb03bdfb1bcf72d52f973cae361ace637001e6cf3b966
3
+ size 14645
transformer/checkpoint-1525/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbcaa50b1076392191a1f8665b7b77d819d3cc73803b90b8509f590e4b16af02
3
+ size 1383
transformer/checkpoint-1525/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93a1789096427e63a18497d8b158869d7f1d9fb6b44e74fef154d50a0e1bd1e0
3
+ size 1465
transformer/checkpoint-1525/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc02d42fb2a10276563109e2287cc0dbe6b595d5b3b3401c7cfeffc0b7e20270
3
+ size 17098351
transformer/checkpoint-1525/tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "backend": "tokenizers",
4
+ "bos_token": "<s>",
5
+ "cls_token": "<s>",
6
+ "eos_token": "</s>",
7
+ "is_local": false,
8
+ "local_files_only": false,
9
+ "mask_token": "<mask>",
10
+ "model_max_length": 512,
11
+ "pad_token": "<pad>",
12
+ "sep_token": "</s>",
13
+ "tokenizer_class": "XLMRobertaTokenizer",
14
+ "unk_token": "<unk>"
15
+ }
transformer/checkpoint-1525/trainer_state.json ADDED
@@ -0,0 +1,535 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1525,
3
+ "best_metric": 0.8072289156626506,
4
+ "best_model_checkpoint": "/content/agri-utilization-classifier/transformer/checkpoint-1525",
5
+ "epoch": 5.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1525,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.08196721311475409,
14
+ "grad_norm": 6.984184741973877,
15
+ "learning_rate": 3.157894736842105e-06,
16
+ "loss": 0.7167730712890625,
17
+ "step": 25
18
+ },
19
+ {
20
+ "epoch": 0.16393442622950818,
21
+ "grad_norm": 9.77598762512207,
22
+ "learning_rate": 6.447368421052632e-06,
23
+ "loss": 0.5636273193359375,
24
+ "step": 50
25
+ },
26
+ {
27
+ "epoch": 0.2459016393442623,
28
+ "grad_norm": 8.441609382629395,
29
+ "learning_rate": 9.736842105263159e-06,
30
+ "loss": 0.37406421661376954,
31
+ "step": 75
32
+ },
33
+ {
34
+ "epoch": 0.32786885245901637,
35
+ "grad_norm": 5.313694953918457,
36
+ "learning_rate": 1.3026315789473684e-05,
37
+ "loss": 0.2965927886962891,
38
+ "step": 100
39
+ },
40
+ {
41
+ "epoch": 0.4098360655737705,
42
+ "grad_norm": 7.240467548370361,
43
+ "learning_rate": 1.6315789473684213e-05,
44
+ "loss": 0.29742313385009767,
45
+ "step": 125
46
+ },
47
+ {
48
+ "epoch": 0.4918032786885246,
49
+ "grad_norm": 6.100603103637695,
50
+ "learning_rate": 1.960526315789474e-05,
51
+ "loss": 0.2068590545654297,
52
+ "step": 150
53
+ },
54
+ {
55
+ "epoch": 0.5737704918032787,
56
+ "grad_norm": 4.169040679931641,
57
+ "learning_rate": 1.9679533867443555e-05,
58
+ "loss": 0.21712726593017578,
59
+ "step": 175
60
+ },
61
+ {
62
+ "epoch": 0.6557377049180327,
63
+ "grad_norm": 4.665876865386963,
64
+ "learning_rate": 1.9315367807720323e-05,
65
+ "loss": 0.2889243125915527,
66
+ "step": 200
67
+ },
68
+ {
69
+ "epoch": 0.7377049180327869,
70
+ "grad_norm": 1.094870924949646,
71
+ "learning_rate": 1.8951201747997088e-05,
72
+ "loss": 0.2303921699523926,
73
+ "step": 225
74
+ },
75
+ {
76
+ "epoch": 0.819672131147541,
77
+ "grad_norm": 1.2164329290390015,
78
+ "learning_rate": 1.8587035688273852e-05,
79
+ "loss": 0.16723501205444335,
80
+ "step": 250
81
+ },
82
+ {
83
+ "epoch": 0.9016393442622951,
84
+ "grad_norm": 2.6314468383789062,
85
+ "learning_rate": 1.822286962855062e-05,
86
+ "loss": 0.15685997009277344,
87
+ "step": 275
88
+ },
89
+ {
90
+ "epoch": 0.9836065573770492,
91
+ "grad_norm": 14.927972793579102,
92
+ "learning_rate": 1.7858703568827385e-05,
93
+ "loss": 0.1979808807373047,
94
+ "step": 300
95
+ },
96
+ {
97
+ "epoch": 1.0,
98
+ "eval_accuracy": 0.9642126789366053,
99
+ "eval_f1": 0.7852760736196319,
100
+ "eval_loss": 0.10679091513156891,
101
+ "eval_precision": 0.7619047619047619,
102
+ "eval_recall": 0.810126582278481,
103
+ "eval_roc_auc": 0.9606665634108222,
104
+ "eval_runtime": 3.9998,
105
+ "eval_samples_per_second": 244.514,
106
+ "eval_steps_per_second": 7.75,
107
+ "step": 305
108
+ },
109
+ {
110
+ "epoch": 1.0655737704918034,
111
+ "grad_norm": 22.521757125854492,
112
+ "learning_rate": 1.7494537509104153e-05,
113
+ "loss": 0.1316550636291504,
114
+ "step": 325
115
+ },
116
+ {
117
+ "epoch": 1.1475409836065573,
118
+ "grad_norm": 0.22687062621116638,
119
+ "learning_rate": 1.7130371449380918e-05,
120
+ "loss": 0.2059168815612793,
121
+ "step": 350
122
+ },
123
+ {
124
+ "epoch": 1.2295081967213115,
125
+ "grad_norm": 0.657261312007904,
126
+ "learning_rate": 1.6766205389657686e-05,
127
+ "loss": 0.09930330276489258,
128
+ "step": 375
129
+ },
130
+ {
131
+ "epoch": 1.3114754098360657,
132
+ "grad_norm": 4.796896457672119,
133
+ "learning_rate": 1.640203932993445e-05,
134
+ "loss": 0.13251757621765137,
135
+ "step": 400
136
+ },
137
+ {
138
+ "epoch": 1.3934426229508197,
139
+ "grad_norm": 0.5394258499145508,
140
+ "learning_rate": 1.603787327021122e-05,
141
+ "loss": 0.17834033966064453,
142
+ "step": 425
143
+ },
144
+ {
145
+ "epoch": 1.4754098360655736,
146
+ "grad_norm": 0.4655781388282776,
147
+ "learning_rate": 1.5673707210487983e-05,
148
+ "loss": 0.122637300491333,
149
+ "step": 450
150
+ },
151
+ {
152
+ "epoch": 1.5573770491803278,
153
+ "grad_norm": 0.42849695682525635,
154
+ "learning_rate": 1.530954115076475e-05,
155
+ "loss": 0.15983641624450684,
156
+ "step": 475
157
+ },
158
+ {
159
+ "epoch": 1.639344262295082,
160
+ "grad_norm": 0.45386794209480286,
161
+ "learning_rate": 1.4945375091041516e-05,
162
+ "loss": 0.14264726638793945,
163
+ "step": 500
164
+ },
165
+ {
166
+ "epoch": 1.721311475409836,
167
+ "grad_norm": 0.6170782446861267,
168
+ "learning_rate": 1.4581209031318282e-05,
169
+ "loss": 0.18886091232299804,
170
+ "step": 525
171
+ },
172
+ {
173
+ "epoch": 1.8032786885245902,
174
+ "grad_norm": 5.4546122550964355,
175
+ "learning_rate": 1.4217042971595047e-05,
176
+ "loss": 0.14393989562988282,
177
+ "step": 550
178
+ },
179
+ {
180
+ "epoch": 1.8852459016393444,
181
+ "grad_norm": 17.419189453125,
182
+ "learning_rate": 1.3852876911871815e-05,
183
+ "loss": 0.10272212982177735,
184
+ "step": 575
185
+ },
186
+ {
187
+ "epoch": 1.9672131147540983,
188
+ "grad_norm": 1.6267497539520264,
189
+ "learning_rate": 1.3488710852148582e-05,
190
+ "loss": 0.1810975456237793,
191
+ "step": 600
192
+ },
193
+ {
194
+ "epoch": 2.0,
195
+ "eval_accuracy": 0.9580777096114519,
196
+ "eval_f1": 0.7759562841530054,
197
+ "eval_loss": 0.17126062512397766,
198
+ "eval_precision": 0.6826923076923077,
199
+ "eval_recall": 0.8987341772151899,
200
+ "eval_roc_auc": 0.9631939848777121,
201
+ "eval_runtime": 3.84,
202
+ "eval_samples_per_second": 254.687,
203
+ "eval_steps_per_second": 8.073,
204
+ "step": 610
205
+ },
206
+ {
207
+ "epoch": 2.0491803278688523,
208
+ "grad_norm": 1.3180276155471802,
209
+ "learning_rate": 1.3124544792425346e-05,
210
+ "loss": 0.05921304225921631,
211
+ "step": 625
212
+ },
213
+ {
214
+ "epoch": 2.1311475409836067,
215
+ "grad_norm": 5.675038814544678,
216
+ "learning_rate": 1.2760378732702113e-05,
217
+ "loss": 0.16824769973754883,
218
+ "step": 650
219
+ },
220
+ {
221
+ "epoch": 2.2131147540983607,
222
+ "grad_norm": 0.16993092000484467,
223
+ "learning_rate": 1.239621267297888e-05,
224
+ "loss": 0.12186273574829101,
225
+ "step": 675
226
+ },
227
+ {
228
+ "epoch": 2.2950819672131146,
229
+ "grad_norm": 1.1791695356369019,
230
+ "learning_rate": 1.2032046613255645e-05,
231
+ "loss": 0.08795836448669433,
232
+ "step": 700
233
+ },
234
+ {
235
+ "epoch": 2.3770491803278686,
236
+ "grad_norm": 0.07541065663099289,
237
+ "learning_rate": 1.1667880553532412e-05,
238
+ "loss": 0.10129087448120117,
239
+ "step": 725
240
+ },
241
+ {
242
+ "epoch": 2.459016393442623,
243
+ "grad_norm": 3.394912004470825,
244
+ "learning_rate": 1.1303714493809176e-05,
245
+ "loss": 0.14056243896484374,
246
+ "step": 750
247
+ },
248
+ {
249
+ "epoch": 2.540983606557377,
250
+ "grad_norm": 8.074258804321289,
251
+ "learning_rate": 1.0939548434085944e-05,
252
+ "loss": 0.06563093185424805,
253
+ "step": 775
254
+ },
255
+ {
256
+ "epoch": 2.6229508196721314,
257
+ "grad_norm": 12.472029685974121,
258
+ "learning_rate": 1.057538237436271e-05,
259
+ "loss": 0.09851057052612305,
260
+ "step": 800
261
+ },
262
+ {
263
+ "epoch": 2.7049180327868854,
264
+ "grad_norm": 0.10368915647268295,
265
+ "learning_rate": 1.0211216314639475e-05,
266
+ "loss": 0.11658324241638184,
267
+ "step": 825
268
+ },
269
+ {
270
+ "epoch": 2.7868852459016393,
271
+ "grad_norm": 44.263092041015625,
272
+ "learning_rate": 9.847050254916243e-06,
273
+ "loss": 0.13634946823120117,
274
+ "step": 850
275
+ },
276
+ {
277
+ "epoch": 2.8688524590163933,
278
+ "grad_norm": 0.07709958404302597,
279
+ "learning_rate": 9.482884195193008e-06,
280
+ "loss": 0.12144805908203125,
281
+ "step": 875
282
+ },
283
+ {
284
+ "epoch": 2.9508196721311473,
285
+ "grad_norm": 0.11255892366170883,
286
+ "learning_rate": 9.118718135469774e-06,
287
+ "loss": 0.11815821647644043,
288
+ "step": 900
289
+ },
290
+ {
291
+ "epoch": 3.0,
292
+ "eval_accuracy": 0.9601226993865031,
293
+ "eval_f1": 0.7868852459016393,
294
+ "eval_loss": 0.14711864292621613,
295
+ "eval_precision": 0.6923076923076923,
296
+ "eval_recall": 0.9113924050632911,
297
+ "eval_roc_auc": 0.9719449177003984,
298
+ "eval_runtime": 3.9043,
299
+ "eval_samples_per_second": 250.495,
300
+ "eval_steps_per_second": 7.94,
301
+ "step": 915
302
+ },
303
+ {
304
+ "epoch": 3.0327868852459017,
305
+ "grad_norm": 0.2808685302734375,
306
+ "learning_rate": 8.754552075746541e-06,
307
+ "loss": 0.1051255989074707,
308
+ "step": 925
309
+ },
310
+ {
311
+ "epoch": 3.1147540983606556,
312
+ "grad_norm": 0.07298991084098816,
313
+ "learning_rate": 8.390386016023307e-06,
314
+ "loss": 0.08817357063293457,
315
+ "step": 950
316
+ },
317
+ {
318
+ "epoch": 3.19672131147541,
319
+ "grad_norm": 0.049921419471502304,
320
+ "learning_rate": 8.026219956300074e-06,
321
+ "loss": 0.11110530853271484,
322
+ "step": 975
323
+ },
324
+ {
325
+ "epoch": 3.278688524590164,
326
+ "grad_norm": 1.874350905418396,
327
+ "learning_rate": 7.66205389657684e-06,
328
+ "loss": 0.09003183364868164,
329
+ "step": 1000
330
+ },
331
+ {
332
+ "epoch": 3.360655737704918,
333
+ "grad_norm": 0.09576287865638733,
334
+ "learning_rate": 7.2978878368536055e-06,
335
+ "loss": 0.05897871017456055,
336
+ "step": 1025
337
+ },
338
+ {
339
+ "epoch": 3.442622950819672,
340
+ "grad_norm": 20.84284019470215,
341
+ "learning_rate": 6.933721777130372e-06,
342
+ "loss": 0.06021720886230469,
343
+ "step": 1050
344
+ },
345
+ {
346
+ "epoch": 3.5245901639344264,
347
+ "grad_norm": 0.06452233344316483,
348
+ "learning_rate": 6.569555717407138e-06,
349
+ "loss": 0.06818977355957032,
350
+ "step": 1075
351
+ },
352
+ {
353
+ "epoch": 3.6065573770491803,
354
+ "grad_norm": 0.2655308246612549,
355
+ "learning_rate": 6.2053896576839045e-06,
356
+ "loss": 0.07051475524902344,
357
+ "step": 1100
358
+ },
359
+ {
360
+ "epoch": 3.6885245901639343,
361
+ "grad_norm": 0.05852988734841347,
362
+ "learning_rate": 5.84122359796067e-06,
363
+ "loss": 0.08089996337890625,
364
+ "step": 1125
365
+ },
366
+ {
367
+ "epoch": 3.7704918032786887,
368
+ "grad_norm": 13.798267364501953,
369
+ "learning_rate": 5.477057538237437e-06,
370
+ "loss": 0.0827936840057373,
371
+ "step": 1150
372
+ },
373
+ {
374
+ "epoch": 3.8524590163934427,
375
+ "grad_norm": 6.363399982452393,
376
+ "learning_rate": 5.112891478514203e-06,
377
+ "loss": 0.06787658214569092,
378
+ "step": 1175
379
+ },
380
+ {
381
+ "epoch": 3.9344262295081966,
382
+ "grad_norm": 0.059008605778217316,
383
+ "learning_rate": 4.748725418790969e-06,
384
+ "loss": 0.05120136260986328,
385
+ "step": 1200
386
+ },
387
+ {
388
+ "epoch": 4.0,
389
+ "eval_accuracy": 0.9662576687116564,
390
+ "eval_f1": 0.8,
391
+ "eval_loss": 0.1406129151582718,
392
+ "eval_precision": 0.7674418604651163,
393
+ "eval_recall": 0.8354430379746836,
394
+ "eval_roc_auc": 0.9636797566916827,
395
+ "eval_runtime": 3.9045,
396
+ "eval_samples_per_second": 250.478,
397
+ "eval_steps_per_second": 7.939,
398
+ "step": 1220
399
+ },
400
+ {
401
+ "epoch": 4.016393442622951,
402
+ "grad_norm": 7.579391002655029,
403
+ "learning_rate": 4.3845593590677355e-06,
404
+ "loss": 0.07703603267669677,
405
+ "step": 1225
406
+ },
407
+ {
408
+ "epoch": 4.098360655737705,
409
+ "grad_norm": 2.102841377258301,
410
+ "learning_rate": 4.020393299344502e-06,
411
+ "loss": 0.032845423221588135,
412
+ "step": 1250
413
+ },
414
+ {
415
+ "epoch": 4.180327868852459,
416
+ "grad_norm": 0.07024268805980682,
417
+ "learning_rate": 3.656227239621268e-06,
418
+ "loss": 0.03449820995330811,
419
+ "step": 1275
420
+ },
421
+ {
422
+ "epoch": 4.262295081967213,
423
+ "grad_norm": 0.02977728098630905,
424
+ "learning_rate": 3.292061179898034e-06,
425
+ "loss": 0.06648642539978028,
426
+ "step": 1300
427
+ },
428
+ {
429
+ "epoch": 4.344262295081967,
430
+ "grad_norm": 0.02086547203361988,
431
+ "learning_rate": 2.9278951201748e-06,
432
+ "loss": 0.06277695178985596,
433
+ "step": 1325
434
+ },
435
+ {
436
+ "epoch": 4.426229508196721,
437
+ "grad_norm": 0.15774419903755188,
438
+ "learning_rate": 2.5637290604515665e-06,
439
+ "loss": 0.02380265951156616,
440
+ "step": 1350
441
+ },
442
+ {
443
+ "epoch": 4.508196721311475,
444
+ "grad_norm": 0.030775833874940872,
445
+ "learning_rate": 2.1995630007283324e-06,
446
+ "loss": 0.06929955959320068,
447
+ "step": 1375
448
+ },
449
+ {
450
+ "epoch": 4.590163934426229,
451
+ "grad_norm": 0.07542883604764938,
452
+ "learning_rate": 1.8353969410050983e-06,
453
+ "loss": 0.027865142822265626,
454
+ "step": 1400
455
+ },
456
+ {
457
+ "epoch": 4.672131147540983,
458
+ "grad_norm": 6.874780178070068,
459
+ "learning_rate": 1.4712308812818645e-06,
460
+ "loss": 0.0548116397857666,
461
+ "step": 1425
462
+ },
463
+ {
464
+ "epoch": 4.754098360655737,
465
+ "grad_norm": 37.22102355957031,
466
+ "learning_rate": 1.1070648215586309e-06,
467
+ "loss": 0.04357499122619629,
468
+ "step": 1450
469
+ },
470
+ {
471
+ "epoch": 4.836065573770492,
472
+ "grad_norm": 0.03843735158443451,
473
+ "learning_rate": 7.428987618353969e-07,
474
+ "loss": 0.03130460500717163,
475
+ "step": 1475
476
+ },
477
+ {
478
+ "epoch": 4.918032786885246,
479
+ "grad_norm": 0.028637070208787918,
480
+ "learning_rate": 3.787327021121632e-07,
481
+ "loss": 0.02157698631286621,
482
+ "step": 1500
483
+ },
484
+ {
485
+ "epoch": 5.0,
486
+ "grad_norm": 0.021514365449547768,
487
+ "learning_rate": 1.4566642388929353e-08,
488
+ "loss": 0.04632264614105225,
489
+ "step": 1525
490
+ },
491
+ {
492
+ "epoch": 5.0,
493
+ "eval_accuracy": 0.967280163599182,
494
+ "eval_f1": 0.8072289156626506,
495
+ "eval_loss": 0.15767407417297363,
496
+ "eval_precision": 0.7701149425287356,
497
+ "eval_recall": 0.8481012658227848,
498
+ "eval_roc_auc": 0.9619549147435266,
499
+ "eval_runtime": 3.8427,
500
+ "eval_samples_per_second": 254.511,
501
+ "eval_steps_per_second": 8.067,
502
+ "step": 1525
503
+ }
504
+ ],
505
+ "logging_steps": 25,
506
+ "max_steps": 1525,
507
+ "num_input_tokens_seen": 0,
508
+ "num_train_epochs": 5,
509
+ "save_steps": 500,
510
+ "stateful_callbacks": {
511
+ "EarlyStoppingCallback": {
512
+ "args": {
513
+ "early_stopping_patience": 2,
514
+ "early_stopping_threshold": 0.0
515
+ },
516
+ "attributes": {
517
+ "early_stopping_patience_counter": 0
518
+ }
519
+ },
520
+ "TrainerControl": {
521
+ "args": {
522
+ "should_epoch_stop": false,
523
+ "should_evaluate": false,
524
+ "should_log": false,
525
+ "should_save": true,
526
+ "should_training_stop": true
527
+ },
528
+ "attributes": {}
529
+ }
530
+ },
531
+ "total_flos": 3207981542476800.0,
532
+ "train_batch_size": 16,
533
+ "trial_name": null,
534
+ "trial_params": null
535
+ }
transformer/checkpoint-1525/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c60366894b25ead0379e8d97e61f1123e1ad4786f5e41a8bc70f2d7bc8901f5
3
+ size 5329
transformer/checkpoint-305/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_cross_attention": false,
3
+ "architectures": [
4
+ "XLMRobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "dtype": "float32",
10
+ "eos_token_id": 2,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "NOT_RELEVANT",
16
+ "1": "RELEVANT"
17
+ },
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 3072,
20
+ "is_decoder": false,
21
+ "label2id": {
22
+ "NOT_RELEVANT": 0,
23
+ "RELEVANT": 1
24
+ },
25
+ "layer_norm_eps": 1e-05,
26
+ "max_position_embeddings": 514,
27
+ "model_type": "xlm-roberta",
28
+ "num_attention_heads": 12,
29
+ "num_hidden_layers": 12,
30
+ "output_past": true,
31
+ "pad_token_id": 1,
32
+ "position_embedding_type": "absolute",
33
+ "problem_type": "single_label_classification",
34
+ "tie_word_embeddings": true,
35
+ "transformers_version": "5.9.0",
36
+ "type_vocab_size": 1,
37
+ "use_cache": false,
38
+ "vocab_size": 250002
39
+ }
transformer/checkpoint-305/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb20e73a65d9daab2db9b645e0bd0608878ef7b24edb7ac88b5558863e0c5ecc
3
+ size 1112205008
transformer/checkpoint-305/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48f419be6f85e4a9a74197d6249f1efe7127ac7668e97007ff458f4b5034cff4
3
+ size 2224532875
transformer/checkpoint-305/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:402debc68e858d5bda7b8fed48fe7886888add874790e6471baac63ac8bdc564
3
+ size 14645
transformer/checkpoint-305/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b479a091e6482cdb8085a5931fe45bae60997ab8939a20b371d61d6be6f7199
3
+ size 1383
transformer/checkpoint-305/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dcb4d91eb3f8c20bc70fefd93bf7ba0111a76badc2faa81080aeb911ab9bd59
3
+ size 1465
transformer/checkpoint-305/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc02d42fb2a10276563109e2287cc0dbe6b595d5b3b3401c7cfeffc0b7e20270
3
+ size 17098351
transformer/checkpoint-305/tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "backend": "tokenizers",
4
+ "bos_token": "<s>",
5
+ "cls_token": "<s>",
6
+ "eos_token": "</s>",
7
+ "is_local": false,
8
+ "local_files_only": false,
9
+ "mask_token": "<mask>",
10
+ "model_max_length": 512,
11
+ "pad_token": "<pad>",
12
+ "sep_token": "</s>",
13
+ "tokenizer_class": "XLMRobertaTokenizer",
14
+ "unk_token": "<unk>"
15
+ }
transformer/checkpoint-305/trainer_state.json ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 305,
3
+ "best_metric": 0.7852760736196319,
4
+ "best_model_checkpoint": "/content/agri-utilization-classifier/transformer/checkpoint-305",
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 305,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.08196721311475409,
14
+ "grad_norm": 6.984184741973877,
15
+ "learning_rate": 3.157894736842105e-06,
16
+ "loss": 0.7167730712890625,
17
+ "step": 25
18
+ },
19
+ {
20
+ "epoch": 0.16393442622950818,
21
+ "grad_norm": 9.77598762512207,
22
+ "learning_rate": 6.447368421052632e-06,
23
+ "loss": 0.5636273193359375,
24
+ "step": 50
25
+ },
26
+ {
27
+ "epoch": 0.2459016393442623,
28
+ "grad_norm": 8.441609382629395,
29
+ "learning_rate": 9.736842105263159e-06,
30
+ "loss": 0.37406421661376954,
31
+ "step": 75
32
+ },
33
+ {
34
+ "epoch": 0.32786885245901637,
35
+ "grad_norm": 5.313694953918457,
36
+ "learning_rate": 1.3026315789473684e-05,
37
+ "loss": 0.2965927886962891,
38
+ "step": 100
39
+ },
40
+ {
41
+ "epoch": 0.4098360655737705,
42
+ "grad_norm": 7.240467548370361,
43
+ "learning_rate": 1.6315789473684213e-05,
44
+ "loss": 0.29742313385009767,
45
+ "step": 125
46
+ },
47
+ {
48
+ "epoch": 0.4918032786885246,
49
+ "grad_norm": 6.100603103637695,
50
+ "learning_rate": 1.960526315789474e-05,
51
+ "loss": 0.2068590545654297,
52
+ "step": 150
53
+ },
54
+ {
55
+ "epoch": 0.5737704918032787,
56
+ "grad_norm": 4.169040679931641,
57
+ "learning_rate": 1.9679533867443555e-05,
58
+ "loss": 0.21712726593017578,
59
+ "step": 175
60
+ },
61
+ {
62
+ "epoch": 0.6557377049180327,
63
+ "grad_norm": 4.665876865386963,
64
+ "learning_rate": 1.9315367807720323e-05,
65
+ "loss": 0.2889243125915527,
66
+ "step": 200
67
+ },
68
+ {
69
+ "epoch": 0.7377049180327869,
70
+ "grad_norm": 1.094870924949646,
71
+ "learning_rate": 1.8951201747997088e-05,
72
+ "loss": 0.2303921699523926,
73
+ "step": 225
74
+ },
75
+ {
76
+ "epoch": 0.819672131147541,
77
+ "grad_norm": 1.2164329290390015,
78
+ "learning_rate": 1.8587035688273852e-05,
79
+ "loss": 0.16723501205444335,
80
+ "step": 250
81
+ },
82
+ {
83
+ "epoch": 0.9016393442622951,
84
+ "grad_norm": 2.6314468383789062,
85
+ "learning_rate": 1.822286962855062e-05,
86
+ "loss": 0.15685997009277344,
87
+ "step": 275
88
+ },
89
+ {
90
+ "epoch": 0.9836065573770492,
91
+ "grad_norm": 14.927972793579102,
92
+ "learning_rate": 1.7858703568827385e-05,
93
+ "loss": 0.1979808807373047,
94
+ "step": 300
95
+ },
96
+ {
97
+ "epoch": 1.0,
98
+ "eval_accuracy": 0.9642126789366053,
99
+ "eval_f1": 0.7852760736196319,
100
+ "eval_loss": 0.10679091513156891,
101
+ "eval_precision": 0.7619047619047619,
102
+ "eval_recall": 0.810126582278481,
103
+ "eval_roc_auc": 0.9606665634108222,
104
+ "eval_runtime": 3.9998,
105
+ "eval_samples_per_second": 244.514,
106
+ "eval_steps_per_second": 7.75,
107
+ "step": 305
108
+ }
109
+ ],
110
+ "logging_steps": 25,
111
+ "max_steps": 1525,
112
+ "num_input_tokens_seen": 0,
113
+ "num_train_epochs": 5,
114
+ "save_steps": 500,
115
+ "stateful_callbacks": {
116
+ "EarlyStoppingCallback": {
117
+ "args": {
118
+ "early_stopping_patience": 2,
119
+ "early_stopping_threshold": 0.0
120
+ },
121
+ "attributes": {
122
+ "early_stopping_patience_counter": 0
123
+ }
124
+ },
125
+ "TrainerControl": {
126
+ "args": {
127
+ "should_epoch_stop": false,
128
+ "should_evaluate": false,
129
+ "should_log": false,
130
+ "should_save": true,
131
+ "should_training_stop": false
132
+ },
133
+ "attributes": {}
134
+ }
135
+ },
136
+ "total_flos": 641596308495360.0,
137
+ "train_batch_size": 16,
138
+ "trial_name": null,
139
+ "trial_params": null
140
+ }
transformer/checkpoint-305/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c60366894b25ead0379e8d97e61f1123e1ad4786f5e41a8bc70f2d7bc8901f5
3
+ size 5329
transformer/checkpoint-610/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_cross_attention": false,
3
+ "architectures": [
4
+ "XLMRobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "dtype": "float32",
10
+ "eos_token_id": 2,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "NOT_RELEVANT",
16
+ "1": "RELEVANT"
17
+ },
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 3072,
20
+ "is_decoder": false,
21
+ "label2id": {
22
+ "NOT_RELEVANT": 0,
23
+ "RELEVANT": 1
24
+ },
25
+ "layer_norm_eps": 1e-05,
26
+ "max_position_embeddings": 514,
27
+ "model_type": "xlm-roberta",
28
+ "num_attention_heads": 12,
29
+ "num_hidden_layers": 12,
30
+ "output_past": true,
31
+ "pad_token_id": 1,
32
+ "position_embedding_type": "absolute",
33
+ "problem_type": "single_label_classification",
34
+ "tie_word_embeddings": true,
35
+ "transformers_version": "5.9.0",
36
+ "type_vocab_size": 1,
37
+ "use_cache": false,
38
+ "vocab_size": 250002
39
+ }