lgsilvaesilva commited on
Commit
4c67f07
·
verified ·
1 Parent(s): 61c6633

Upload folder using huggingface_hub

Browse files
Files changed (36) hide show
  1. README.md +70 -51
  2. REPORT.md +39 -39
  3. baselines/embedding-lightgbm/embedding-lightgbm.joblib +2 -2
  4. baselines/embedding-lightgbm/test_predictions.csv +0 -0
  5. baselines/embedding-lightgbm/validation_predictions.csv +0 -0
  6. baselines/embedding-logistic/embedding-logistic.joblib +2 -2
  7. baselines/embedding-logistic/test_predictions.csv +0 -0
  8. baselines/embedding-logistic/validation_predictions.csv +0 -0
  9. baselines/embedding-svm/embedding-svm.joblib +2 -2
  10. baselines/embedding-svm/test_predictions.csv +0 -0
  11. baselines/embedding-svm/validation_predictions.csv +0 -0
  12. report.json +385 -385
  13. transformer/checkpoint-1220/model.safetensors +1 -1
  14. transformer/checkpoint-1220/optimizer.pt +1 -1
  15. transformer/checkpoint-1220/scaler.pt +1 -1
  16. transformer/checkpoint-1220/trainer_state.json +136 -136
  17. transformer/checkpoint-1525/model.safetensors +1 -1
  18. transformer/checkpoint-1525/optimizer.pt +1 -1
  19. transformer/checkpoint-1525/scaler.pt +1 -1
  20. transformer/checkpoint-1525/trainer_state.json +171 -171
  21. transformer/checkpoint-305/model.safetensors +1 -1
  22. transformer/checkpoint-305/optimizer.pt +1 -1
  23. transformer/checkpoint-305/scaler.pt +1 -1
  24. transformer/checkpoint-305/trainer_state.json +34 -34
  25. transformer/checkpoint-610/model.safetensors +1 -1
  26. transformer/checkpoint-610/optimizer.pt +1 -1
  27. transformer/checkpoint-610/scaler.pt +1 -1
  28. transformer/checkpoint-610/trainer_state.json +67 -67
  29. transformer/checkpoint-915/model.safetensors +1 -1
  30. transformer/checkpoint-915/optimizer.pt +1 -1
  31. transformer/checkpoint-915/scaler.pt +1 -1
  32. transformer/checkpoint-915/trainer_state.json +100 -100
  33. transformer/config.json +5 -5
  34. transformer/model.safetensors +1 -1
  35. transformer/test_predictions.csv +0 -0
  36. transformer/validation_predictions.csv +0 -0
README.md CHANGED
@@ -21,7 +21,7 @@ It includes the Transformer model, any configured TF-IDF or sentence-embedding b
21
  - Text column: `chunk_text`
22
  - Label column: `label`
23
  - Transformer: `FacebookAI/xlm-roberta-base`
24
- - Generated at: `2026-05-26T17:46:00.691870+00:00`
25
 
26
  ## Dataset Summary
27
 
@@ -41,14 +41,14 @@ Validation metrics document threshold selection and tuning behavior; test metric
41
  | logistic_tfidf | 0.608 | 0.942 | 0.696 | 0.494 | 0.578 | 0.872 | 0.594 |
42
  | xgboost_tfidf | 0.500 | 0.945 | 0.931 | 0.342 | 0.500 | 0.823 | 0.588 |
43
  | xgboost_tfidf | 0.177 | 0.934 | 0.592 | 0.570 | 0.581 | 0.823 | 0.588 |
44
- | embedding-logistic_sentence_embeddings | 0.500 | 0.916 | 0.490 | 0.911 | 0.637 | 0.956 | 0.749 |
45
- | embedding-logistic_sentence_embeddings | 0.616 | 0.946 | 0.612 | 0.899 | 0.728 | 0.956 | 0.749 |
46
- | embedding-svm_sentence_embeddings | 0.500 | 0.957 | 0.803 | 0.620 | 0.700 | 0.958 | 0.743 |
47
- | embedding-svm_sentence_embeddings | 0.276 | 0.952 | 0.667 | 0.810 | 0.731 | 0.958 | 0.743 |
48
- | embedding-lightgbm_sentence_embeddings | 0.500 | 0.948 | 0.700 | 0.620 | 0.658 | 0.952 | 0.778 |
49
- | embedding-lightgbm_sentence_embeddings | 0.052 | 0.953 | 0.670 | 0.823 | 0.739 | 0.952 | 0.778 |
50
- | transformer | 0.500 | 0.973 | 0.812 | 0.873 | 0.841 | 0.971 | 0.836 |
51
- | transformer | 0.500 | 0.974 | 0.814 | 0.886 | 0.848 | 0.971 | 0.836 |
52
 
53
  ## Threshold Comparison on Test Split
54
 
@@ -58,14 +58,14 @@ Validation metrics document threshold selection and tuning behavior; test metric
58
  | logistic_tfidf | 0.608 | 0.930 | 0.902 | 0.411 | 0.564 | 0.899 | 0.726 |
59
  | xgboost_tfidf | 0.500 | 0.924 | 1.000 | 0.312 | 0.476 | 0.892 | 0.692 |
60
  | xgboost_tfidf | 0.177 | 0.918 | 0.663 | 0.527 | 0.587 | 0.892 | 0.692 |
61
- | embedding-logistic_sentence_embeddings | 0.500 | 0.899 | 0.524 | 0.866 | 0.653 | 0.952 | 0.759 |
62
- | embedding-logistic_sentence_embeddings | 0.616 | 0.929 | 0.632 | 0.857 | 0.727 | 0.952 | 0.759 |
63
- | embedding-svm_sentence_embeddings | 0.500 | 0.941 | 0.771 | 0.661 | 0.712 | 0.952 | 0.743 |
64
- | embedding-svm_sentence_embeddings | 0.276 | 0.935 | 0.667 | 0.821 | 0.736 | 0.952 | 0.743 |
65
- | embedding-lightgbm_sentence_embeddings | 0.500 | 0.946 | 0.788 | 0.696 | 0.739 | 0.959 | 0.801 |
66
- | embedding-lightgbm_sentence_embeddings | 0.052 | 0.933 | 0.657 | 0.821 | 0.730 | 0.959 | 0.801 |
67
- | transformer | 0.500 | 0.945 | 0.750 | 0.750 | 0.750 | 0.954 | 0.773 |
68
- | transformer | 0.500 | 0.945 | 0.750 | 0.750 | 0.750 | 0.954 | 0.773 |
69
 
70
  ## Confusion Matrices on Test Split
71
 
@@ -103,67 +103,67 @@ Rows are true labels and columns are predicted labels.
103
 
104
  | True / Predicted | NOT_RELEVANT | RELEVANT |
105
  | --- | ---: | ---: |
106
- | NOT_RELEVANT | 816 | 88 |
107
- | RELEVANT | 15 | 97 |
108
 
109
- ### embedding-logistic_sentence_embeddings at threshold 0.616
110
 
111
  | True / Predicted | NOT_RELEVANT | RELEVANT |
112
  | --- | ---: | ---: |
113
- | NOT_RELEVANT | 848 | 56 |
114
- | RELEVANT | 16 | 96 |
115
 
116
  ### embedding-svm_sentence_embeddings at threshold 0.500
117
 
118
  | True / Predicted | NOT_RELEVANT | RELEVANT |
119
  | --- | ---: | ---: |
120
  | NOT_RELEVANT | 882 | 22 |
121
- | RELEVANT | 38 | 74 |
122
 
123
- ### embedding-svm_sentence_embeddings at threshold 0.276
124
 
125
  | True / Predicted | NOT_RELEVANT | RELEVANT |
126
  | --- | ---: | ---: |
127
- | NOT_RELEVANT | 858 | 46 |
128
- | RELEVANT | 20 | 92 |
129
 
130
  ### embedding-lightgbm_sentence_embeddings at threshold 0.500
131
 
132
  | True / Predicted | NOT_RELEVANT | RELEVANT |
133
  | --- | ---: | ---: |
134
- | NOT_RELEVANT | 883 | 21 |
135
- | RELEVANT | 34 | 78 |
136
 
137
- ### embedding-lightgbm_sentence_embeddings at threshold 0.052
138
 
139
  | True / Predicted | NOT_RELEVANT | RELEVANT |
140
  | --- | ---: | ---: |
141
- | NOT_RELEVANT | 856 | 48 |
142
  | RELEVANT | 20 | 92 |
143
 
144
  ### transformer at threshold 0.500
145
 
146
  | True / Predicted | NOT_RELEVANT | RELEVANT |
147
  | --- | ---: | ---: |
148
- | NOT_RELEVANT | 876 | 28 |
149
- | RELEVANT | 28 | 84 |
150
 
151
- ### transformer at threshold 0.500
152
 
153
  | True / Predicted | NOT_RELEVANT | RELEVANT |
154
  | --- | ---: | ---: |
155
- | NOT_RELEVANT | 876 | 28 |
156
- | RELEVANT | 28 | 84 |
157
 
158
 
159
  ## Validation-Tuned Thresholds
160
 
161
  - `logistic_tfidf`: threshold `0.608` (validation F1 `0.578`); test F1 change vs 0.5: `-0.077`.
162
  - `xgboost_tfidf`: threshold `0.177` (validation F1 `0.581`); test F1 change vs 0.5: `+0.111`.
163
- - `embedding-logistic_sentence_embeddings`: threshold `0.616` (validation F1 `0.728`); test F1 change vs 0.5: `+0.074`.
164
- - `embedding-svm_sentence_embeddings`: threshold `0.276` (validation F1 `0.731`); test F1 change vs 0.5: `+0.024`.
165
- - `embedding-lightgbm_sentence_embeddings`: threshold `0.052` (validation F1 `0.739`); test F1 change vs 0.5: `-0.009`.
166
- - `transformer`: threshold `0.500` (validation F1 `0.848`); test F1 change vs 0.5: `+0.000`.
167
 
168
  ## Artifacts
169
 
@@ -179,7 +179,7 @@ Rows are true labels and columns are predicted labels.
179
  Install the runtime dependencies:
180
 
181
  ```bash
182
- pip install transformers torch huggingface_hub pandas joblib scikit-learn xgboost sentence-transformers lightgbm
183
  ```
184
 
185
  ### Transformer
@@ -188,7 +188,7 @@ pip install transformers torch huggingface_hub pandas joblib scikit-learn xgboos
188
  import torch
189
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
190
 
191
- MODEL_ID = "YOUR_USERNAME/YOUR_MODEL_REPO"
192
 
193
  texts = [
194
  "Rice export prices increased after new procurement rules were announced.",
@@ -225,7 +225,7 @@ import json
225
  import joblib
226
  from huggingface_hub import hf_hub_download
227
 
228
- MODEL_ID = "YOUR_USERNAME/YOUR_MODEL_REPO"
229
  BASELINE = "logistic"
230
 
231
  texts = [
@@ -266,10 +266,11 @@ Available embedding baseline names in this run: "embedding-logistic", "embedding
266
 
267
  ```python
268
  import joblib
 
269
  from huggingface_hub import hf_hub_download
270
- from sentence_transformers import SentenceTransformer
271
 
272
- MODEL_ID = "YOUR_USERNAME/YOUR_MODEL_REPO"
273
  BASELINE = "embedding-logistic"
274
 
275
  texts = [
@@ -283,13 +284,31 @@ model_path = hf_hub_download(
283
  filename=f"baselines/{BASELINE}/{BASELINE}.joblib",
284
  )
285
  artifact = joblib.load(model_path)
286
- embedding_model = SentenceTransformer(artifact["embedding_model_name"])
287
- embeddings = embedding_model.encode(
288
- texts,
289
- batch_size=artifact.get("embedding_batch_size", 64),
290
- convert_to_numpy=True,
291
- normalize_embeddings=artifact.get("normalize_embeddings", True),
292
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293
  probabilities = artifact["classifier"].predict_proba(embeddings)[:, 1]
294
  threshold = artifact["validation_best_threshold"]["threshold"]
295
 
 
21
  - Text column: `chunk_text`
22
  - Label column: `label`
23
  - Transformer: `FacebookAI/xlm-roberta-base`
24
+ - Generated at: `2026-05-27T10:50:45.867038+00:00`
25
 
26
  ## Dataset Summary
27
 
 
41
  | logistic_tfidf | 0.608 | 0.942 | 0.696 | 0.494 | 0.578 | 0.872 | 0.594 |
42
  | xgboost_tfidf | 0.500 | 0.945 | 0.931 | 0.342 | 0.500 | 0.823 | 0.588 |
43
  | xgboost_tfidf | 0.177 | 0.934 | 0.592 | 0.570 | 0.581 | 0.823 | 0.588 |
44
+ | embedding-logistic_sentence_embeddings | 0.500 | 0.912 | 0.476 | 0.861 | 0.613 | 0.953 | 0.762 |
45
+ | embedding-logistic_sentence_embeddings | 0.722 | 0.957 | 0.703 | 0.810 | 0.753 | 0.953 | 0.762 |
46
+ | embedding-svm_sentence_embeddings | 0.500 | 0.955 | 0.807 | 0.582 | 0.676 | 0.952 | 0.754 |
47
+ | embedding-svm_sentence_embeddings | 0.310 | 0.957 | 0.713 | 0.785 | 0.747 | 0.952 | 0.754 |
48
+ | embedding-lightgbm_sentence_embeddings | 0.500 | 0.954 | 0.750 | 0.646 | 0.694 | 0.948 | 0.782 |
49
+ | embedding-lightgbm_sentence_embeddings | 0.042 | 0.952 | 0.670 | 0.797 | 0.728 | 0.948 | 0.782 |
50
+ | transformer | 0.500 | 0.970 | 0.798 | 0.848 | 0.822 | 0.966 | 0.854 |
51
+ | transformer | 0.471 | 0.971 | 0.800 | 0.861 | 0.829 | 0.966 | 0.854 |
52
 
53
  ## Threshold Comparison on Test Split
54
 
 
58
  | logistic_tfidf | 0.608 | 0.930 | 0.902 | 0.411 | 0.564 | 0.899 | 0.726 |
59
  | xgboost_tfidf | 0.500 | 0.924 | 1.000 | 0.312 | 0.476 | 0.892 | 0.692 |
60
  | xgboost_tfidf | 0.177 | 0.918 | 0.663 | 0.527 | 0.587 | 0.892 | 0.692 |
61
+ | embedding-logistic_sentence_embeddings | 0.500 | 0.891 | 0.503 | 0.884 | 0.641 | 0.955 | 0.710 |
62
+ | embedding-logistic_sentence_embeddings | 0.722 | 0.935 | 0.689 | 0.750 | 0.718 | 0.955 | 0.710 |
63
+ | embedding-svm_sentence_embeddings | 0.500 | 0.930 | 0.741 | 0.562 | 0.640 | 0.956 | 0.704 |
64
+ | embedding-svm_sentence_embeddings | 0.310 | 0.934 | 0.686 | 0.741 | 0.712 | 0.956 | 0.704 |
65
+ | embedding-lightgbm_sentence_embeddings | 0.500 | 0.937 | 0.740 | 0.661 | 0.698 | 0.960 | 0.791 |
66
+ | embedding-lightgbm_sentence_embeddings | 0.042 | 0.929 | 0.639 | 0.821 | 0.719 | 0.960 | 0.791 |
67
+ | transformer | 0.500 | 0.951 | 0.777 | 0.777 | 0.777 | 0.968 | 0.817 |
68
+ | transformer | 0.471 | 0.950 | 0.770 | 0.777 | 0.773 | 0.968 | 0.817 |
69
 
70
  ## Confusion Matrices on Test Split
71
 
 
103
 
104
  | True / Predicted | NOT_RELEVANT | RELEVANT |
105
  | --- | ---: | ---: |
106
+ | NOT_RELEVANT | 806 | 98 |
107
+ | RELEVANT | 13 | 99 |
108
 
109
+ ### embedding-logistic_sentence_embeddings at threshold 0.722
110
 
111
  | True / Predicted | NOT_RELEVANT | RELEVANT |
112
  | --- | ---: | ---: |
113
+ | NOT_RELEVANT | 866 | 38 |
114
+ | RELEVANT | 28 | 84 |
115
 
116
  ### embedding-svm_sentence_embeddings at threshold 0.500
117
 
118
  | True / Predicted | NOT_RELEVANT | RELEVANT |
119
  | --- | ---: | ---: |
120
  | NOT_RELEVANT | 882 | 22 |
121
+ | RELEVANT | 49 | 63 |
122
 
123
+ ### embedding-svm_sentence_embeddings at threshold 0.310
124
 
125
  | True / Predicted | NOT_RELEVANT | RELEVANT |
126
  | --- | ---: | ---: |
127
+ | NOT_RELEVANT | 866 | 38 |
128
+ | RELEVANT | 29 | 83 |
129
 
130
  ### embedding-lightgbm_sentence_embeddings at threshold 0.500
131
 
132
  | True / Predicted | NOT_RELEVANT | RELEVANT |
133
  | --- | ---: | ---: |
134
+ | NOT_RELEVANT | 878 | 26 |
135
+ | RELEVANT | 38 | 74 |
136
 
137
+ ### embedding-lightgbm_sentence_embeddings at threshold 0.042
138
 
139
  | True / Predicted | NOT_RELEVANT | RELEVANT |
140
  | --- | ---: | ---: |
141
+ | NOT_RELEVANT | 852 | 52 |
142
  | RELEVANT | 20 | 92 |
143
 
144
  ### transformer at threshold 0.500
145
 
146
  | True / Predicted | NOT_RELEVANT | RELEVANT |
147
  | --- | ---: | ---: |
148
+ | NOT_RELEVANT | 879 | 25 |
149
+ | RELEVANT | 25 | 87 |
150
 
151
+ ### transformer at threshold 0.471
152
 
153
  | True / Predicted | NOT_RELEVANT | RELEVANT |
154
  | --- | ---: | ---: |
155
+ | NOT_RELEVANT | 878 | 26 |
156
+ | RELEVANT | 25 | 87 |
157
 
158
 
159
  ## Validation-Tuned Thresholds
160
 
161
  - `logistic_tfidf`: threshold `0.608` (validation F1 `0.578`); test F1 change vs 0.5: `-0.077`.
162
  - `xgboost_tfidf`: threshold `0.177` (validation F1 `0.581`); test F1 change vs 0.5: `+0.111`.
163
+ - `embedding-logistic_sentence_embeddings`: threshold `0.722` (validation F1 `0.753`); test F1 change vs 0.5: `+0.077`.
164
+ - `embedding-svm_sentence_embeddings`: threshold `0.310` (validation F1 `0.747`); test F1 change vs 0.5: `+0.073`.
165
+ - `embedding-lightgbm_sentence_embeddings`: threshold `0.042` (validation F1 `0.728`); test F1 change vs 0.5: `+0.021`.
166
+ - `transformer`: threshold `0.471` (validation F1 `0.829`); test F1 change vs 0.5: `-0.003`.
167
 
168
  ## Artifacts
169
 
 
179
  Install the runtime dependencies:
180
 
181
  ```bash
182
+ pip install transformers torch huggingface_hub pandas joblib scikit-learn xgboost lightgbm
183
  ```
184
 
185
  ### Transformer
 
188
  import torch
189
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
190
 
191
+ MODEL_ID = "faodl/agri-utilization-classifier"
192
 
193
  texts = [
194
  "Rice export prices increased after new procurement rules were announced.",
 
225
  import joblib
226
  from huggingface_hub import hf_hub_download
227
 
228
+ MODEL_ID = "faodl/agri-utilization-classifier"
229
  BASELINE = "logistic"
230
 
231
  texts = [
 
266
 
267
  ```python
268
  import joblib
269
+ import torch
270
  from huggingface_hub import hf_hub_download
271
+ from transformers import AutoModel, AutoTokenizer
272
 
273
+ MODEL_ID = "faodl/agri-utilization-classifier"
274
  BASELINE = "embedding-logistic"
275
 
276
  texts = [
 
284
  filename=f"baselines/{BASELINE}/{BASELINE}.joblib",
285
  )
286
  artifact = joblib.load(model_path)
287
+ tokenizer = AutoTokenizer.from_pretrained(artifact["embedding_model_name"])
288
+ encoder = AutoModel.from_pretrained(artifact["embedding_model_name"])
289
+ encoder.eval()
290
+
291
+ encoded_batches = []
292
+ batch_size = artifact.get("embedding_batch_size", 64)
293
+ for start in range(0, len(texts), batch_size):
294
+ batch_texts = texts[start : start + batch_size]
295
+ inputs = tokenizer(
296
+ batch_texts,
297
+ padding=True,
298
+ truncation=True,
299
+ max_length=artifact.get("embedding_max_length", 256),
300
+ return_tensors="pt",
301
+ )
302
+ with torch.no_grad():
303
+ outputs = encoder(**inputs)
304
+ token_embeddings = outputs.last_hidden_state
305
+ attention_mask = inputs["attention_mask"].unsqueeze(-1).to(token_embeddings.dtype)
306
+ embeddings = (token_embeddings * attention_mask).sum(dim=1)
307
+ embeddings = embeddings / attention_mask.sum(dim=1).clamp(min=1e-9)
308
+ if artifact.get("normalize_embeddings", True):
309
+ embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
310
+ encoded_batches.append(embeddings)
311
+ embeddings = torch.cat(encoded_batches).numpy()
312
  probabilities = artifact["classifier"].predict_proba(embeddings)[:, 1]
313
  threshold = artifact["validation_best_threshold"]["threshold"]
314
 
REPORT.md CHANGED
@@ -6,7 +6,7 @@
6
  - Text column: `chunk_text`
7
  - Label column: `label`
8
  - Transformer: `FacebookAI/xlm-roberta-base`
9
- - Generated at: `2026-05-26T17:46:00.691870+00:00`
10
 
11
  ## Dataset Summary
12
 
@@ -26,14 +26,14 @@ Validation metrics document threshold selection and tuning behavior; test metric
26
  | logistic_tfidf | 0.608 | 0.942 | 0.696 | 0.494 | 0.578 | 0.872 | 0.594 |
27
  | xgboost_tfidf | 0.500 | 0.945 | 0.931 | 0.342 | 0.500 | 0.823 | 0.588 |
28
  | xgboost_tfidf | 0.177 | 0.934 | 0.592 | 0.570 | 0.581 | 0.823 | 0.588 |
29
- | embedding-logistic_sentence_embeddings | 0.500 | 0.916 | 0.490 | 0.911 | 0.637 | 0.956 | 0.749 |
30
- | embedding-logistic_sentence_embeddings | 0.616 | 0.946 | 0.612 | 0.899 | 0.728 | 0.956 | 0.749 |
31
- | embedding-svm_sentence_embeddings | 0.500 | 0.957 | 0.803 | 0.620 | 0.700 | 0.958 | 0.743 |
32
- | embedding-svm_sentence_embeddings | 0.276 | 0.952 | 0.667 | 0.810 | 0.731 | 0.958 | 0.743 |
33
- | embedding-lightgbm_sentence_embeddings | 0.500 | 0.948 | 0.700 | 0.620 | 0.658 | 0.952 | 0.778 |
34
- | embedding-lightgbm_sentence_embeddings | 0.052 | 0.953 | 0.670 | 0.823 | 0.739 | 0.952 | 0.778 |
35
- | transformer | 0.500 | 0.973 | 0.812 | 0.873 | 0.841 | 0.971 | 0.836 |
36
- | transformer | 0.500 | 0.974 | 0.814 | 0.886 | 0.848 | 0.971 | 0.836 |
37
 
38
  ## Threshold Comparison on Test Split
39
 
@@ -43,14 +43,14 @@ Validation metrics document threshold selection and tuning behavior; test metric
43
  | logistic_tfidf | 0.608 | 0.930 | 0.902 | 0.411 | 0.564 | 0.899 | 0.726 |
44
  | xgboost_tfidf | 0.500 | 0.924 | 1.000 | 0.312 | 0.476 | 0.892 | 0.692 |
45
  | xgboost_tfidf | 0.177 | 0.918 | 0.663 | 0.527 | 0.587 | 0.892 | 0.692 |
46
- | embedding-logistic_sentence_embeddings | 0.500 | 0.899 | 0.524 | 0.866 | 0.653 | 0.952 | 0.759 |
47
- | embedding-logistic_sentence_embeddings | 0.616 | 0.929 | 0.632 | 0.857 | 0.727 | 0.952 | 0.759 |
48
- | embedding-svm_sentence_embeddings | 0.500 | 0.941 | 0.771 | 0.661 | 0.712 | 0.952 | 0.743 |
49
- | embedding-svm_sentence_embeddings | 0.276 | 0.935 | 0.667 | 0.821 | 0.736 | 0.952 | 0.743 |
50
- | embedding-lightgbm_sentence_embeddings | 0.500 | 0.946 | 0.788 | 0.696 | 0.739 | 0.959 | 0.801 |
51
- | embedding-lightgbm_sentence_embeddings | 0.052 | 0.933 | 0.657 | 0.821 | 0.730 | 0.959 | 0.801 |
52
- | transformer | 0.500 | 0.945 | 0.750 | 0.750 | 0.750 | 0.954 | 0.773 |
53
- | transformer | 0.500 | 0.945 | 0.750 | 0.750 | 0.750 | 0.954 | 0.773 |
54
 
55
  ## Confusion Matrices on Test Split
56
 
@@ -88,67 +88,67 @@ Rows are true labels and columns are predicted labels.
88
 
89
  | True / Predicted | NOT_RELEVANT | RELEVANT |
90
  | --- | ---: | ---: |
91
- | NOT_RELEVANT | 816 | 88 |
92
- | RELEVANT | 15 | 97 |
93
 
94
- ### embedding-logistic_sentence_embeddings at threshold 0.616
95
 
96
  | True / Predicted | NOT_RELEVANT | RELEVANT |
97
  | --- | ---: | ---: |
98
- | NOT_RELEVANT | 848 | 56 |
99
- | RELEVANT | 16 | 96 |
100
 
101
  ### embedding-svm_sentence_embeddings at threshold 0.500
102
 
103
  | True / Predicted | NOT_RELEVANT | RELEVANT |
104
  | --- | ---: | ---: |
105
  | NOT_RELEVANT | 882 | 22 |
106
- | RELEVANT | 38 | 74 |
107
 
108
- ### embedding-svm_sentence_embeddings at threshold 0.276
109
 
110
  | True / Predicted | NOT_RELEVANT | RELEVANT |
111
  | --- | ---: | ---: |
112
- | NOT_RELEVANT | 858 | 46 |
113
- | RELEVANT | 20 | 92 |
114
 
115
  ### embedding-lightgbm_sentence_embeddings at threshold 0.500
116
 
117
  | True / Predicted | NOT_RELEVANT | RELEVANT |
118
  | --- | ---: | ---: |
119
- | NOT_RELEVANT | 883 | 21 |
120
- | RELEVANT | 34 | 78 |
121
 
122
- ### embedding-lightgbm_sentence_embeddings at threshold 0.052
123
 
124
  | True / Predicted | NOT_RELEVANT | RELEVANT |
125
  | --- | ---: | ---: |
126
- | NOT_RELEVANT | 856 | 48 |
127
  | RELEVANT | 20 | 92 |
128
 
129
  ### transformer at threshold 0.500
130
 
131
  | True / Predicted | NOT_RELEVANT | RELEVANT |
132
  | --- | ---: | ---: |
133
- | NOT_RELEVANT | 876 | 28 |
134
- | RELEVANT | 28 | 84 |
135
 
136
- ### transformer at threshold 0.500
137
 
138
  | True / Predicted | NOT_RELEVANT | RELEVANT |
139
  | --- | ---: | ---: |
140
- | NOT_RELEVANT | 876 | 28 |
141
- | RELEVANT | 28 | 84 |
142
 
143
 
144
  ## Validation-Tuned Thresholds
145
 
146
  - `logistic_tfidf`: threshold `0.608` (validation F1 `0.578`); test F1 change vs 0.5: `-0.077`.
147
  - `xgboost_tfidf`: threshold `0.177` (validation F1 `0.581`); test F1 change vs 0.5: `+0.111`.
148
- - `embedding-logistic_sentence_embeddings`: threshold `0.616` (validation F1 `0.728`); test F1 change vs 0.5: `+0.074`.
149
- - `embedding-svm_sentence_embeddings`: threshold `0.276` (validation F1 `0.731`); test F1 change vs 0.5: `+0.024`.
150
- - `embedding-lightgbm_sentence_embeddings`: threshold `0.052` (validation F1 `0.739`); test F1 change vs 0.5: `-0.009`.
151
- - `transformer`: threshold `0.500` (validation F1 `0.848`); test F1 change vs 0.5: `+0.000`.
152
 
153
  ## Artifacts
154
 
 
6
  - Text column: `chunk_text`
7
  - Label column: `label`
8
  - Transformer: `FacebookAI/xlm-roberta-base`
9
+ - Generated at: `2026-05-27T10:50:45.867038+00:00`
10
 
11
  ## Dataset Summary
12
 
 
26
  | logistic_tfidf | 0.608 | 0.942 | 0.696 | 0.494 | 0.578 | 0.872 | 0.594 |
27
  | xgboost_tfidf | 0.500 | 0.945 | 0.931 | 0.342 | 0.500 | 0.823 | 0.588 |
28
  | xgboost_tfidf | 0.177 | 0.934 | 0.592 | 0.570 | 0.581 | 0.823 | 0.588 |
29
+ | embedding-logistic_sentence_embeddings | 0.500 | 0.912 | 0.476 | 0.861 | 0.613 | 0.953 | 0.762 |
30
+ | embedding-logistic_sentence_embeddings | 0.722 | 0.957 | 0.703 | 0.810 | 0.753 | 0.953 | 0.762 |
31
+ | embedding-svm_sentence_embeddings | 0.500 | 0.955 | 0.807 | 0.582 | 0.676 | 0.952 | 0.754 |
32
+ | embedding-svm_sentence_embeddings | 0.310 | 0.957 | 0.713 | 0.785 | 0.747 | 0.952 | 0.754 |
33
+ | embedding-lightgbm_sentence_embeddings | 0.500 | 0.954 | 0.750 | 0.646 | 0.694 | 0.948 | 0.782 |
34
+ | embedding-lightgbm_sentence_embeddings | 0.042 | 0.952 | 0.670 | 0.797 | 0.728 | 0.948 | 0.782 |
35
+ | transformer | 0.500 | 0.970 | 0.798 | 0.848 | 0.822 | 0.966 | 0.854 |
36
+ | transformer | 0.471 | 0.971 | 0.800 | 0.861 | 0.829 | 0.966 | 0.854 |
37
 
38
  ## Threshold Comparison on Test Split
39
 
 
43
  | logistic_tfidf | 0.608 | 0.930 | 0.902 | 0.411 | 0.564 | 0.899 | 0.726 |
44
  | xgboost_tfidf | 0.500 | 0.924 | 1.000 | 0.312 | 0.476 | 0.892 | 0.692 |
45
  | xgboost_tfidf | 0.177 | 0.918 | 0.663 | 0.527 | 0.587 | 0.892 | 0.692 |
46
+ | embedding-logistic_sentence_embeddings | 0.500 | 0.891 | 0.503 | 0.884 | 0.641 | 0.955 | 0.710 |
47
+ | embedding-logistic_sentence_embeddings | 0.722 | 0.935 | 0.689 | 0.750 | 0.718 | 0.955 | 0.710 |
48
+ | embedding-svm_sentence_embeddings | 0.500 | 0.930 | 0.741 | 0.562 | 0.640 | 0.956 | 0.704 |
49
+ | embedding-svm_sentence_embeddings | 0.310 | 0.934 | 0.686 | 0.741 | 0.712 | 0.956 | 0.704 |
50
+ | embedding-lightgbm_sentence_embeddings | 0.500 | 0.937 | 0.740 | 0.661 | 0.698 | 0.960 | 0.791 |
51
+ | embedding-lightgbm_sentence_embeddings | 0.042 | 0.929 | 0.639 | 0.821 | 0.719 | 0.960 | 0.791 |
52
+ | transformer | 0.500 | 0.951 | 0.777 | 0.777 | 0.777 | 0.968 | 0.817 |
53
+ | transformer | 0.471 | 0.950 | 0.770 | 0.777 | 0.773 | 0.968 | 0.817 |
54
 
55
  ## Confusion Matrices on Test Split
56
 
 
88
 
89
  | True / Predicted | NOT_RELEVANT | RELEVANT |
90
  | --- | ---: | ---: |
91
+ | NOT_RELEVANT | 806 | 98 |
92
+ | RELEVANT | 13 | 99 |
93
 
94
+ ### embedding-logistic_sentence_embeddings at threshold 0.722
95
 
96
  | True / Predicted | NOT_RELEVANT | RELEVANT |
97
  | --- | ---: | ---: |
98
+ | NOT_RELEVANT | 866 | 38 |
99
+ | RELEVANT | 28 | 84 |
100
 
101
  ### embedding-svm_sentence_embeddings at threshold 0.500
102
 
103
  | True / Predicted | NOT_RELEVANT | RELEVANT |
104
  | --- | ---: | ---: |
105
  | NOT_RELEVANT | 882 | 22 |
106
+ | RELEVANT | 49 | 63 |
107
 
108
+ ### embedding-svm_sentence_embeddings at threshold 0.310
109
 
110
  | True / Predicted | NOT_RELEVANT | RELEVANT |
111
  | --- | ---: | ---: |
112
+ | NOT_RELEVANT | 866 | 38 |
113
+ | RELEVANT | 29 | 83 |
114
 
115
  ### embedding-lightgbm_sentence_embeddings at threshold 0.500
116
 
117
  | True / Predicted | NOT_RELEVANT | RELEVANT |
118
  | --- | ---: | ---: |
119
+ | NOT_RELEVANT | 878 | 26 |
120
+ | RELEVANT | 38 | 74 |
121
 
122
+ ### embedding-lightgbm_sentence_embeddings at threshold 0.042
123
 
124
  | True / Predicted | NOT_RELEVANT | RELEVANT |
125
  | --- | ---: | ---: |
126
+ | NOT_RELEVANT | 852 | 52 |
127
  | RELEVANT | 20 | 92 |
128
 
129
  ### transformer at threshold 0.500
130
 
131
  | True / Predicted | NOT_RELEVANT | RELEVANT |
132
  | --- | ---: | ---: |
133
+ | NOT_RELEVANT | 879 | 25 |
134
+ | RELEVANT | 25 | 87 |
135
 
136
+ ### transformer at threshold 0.471
137
 
138
  | True / Predicted | NOT_RELEVANT | RELEVANT |
139
  | --- | ---: | ---: |
140
+ | NOT_RELEVANT | 878 | 26 |
141
+ | RELEVANT | 25 | 87 |
142
 
143
 
144
  ## Validation-Tuned Thresholds
145
 
146
  - `logistic_tfidf`: threshold `0.608` (validation F1 `0.578`); test F1 change vs 0.5: `-0.077`.
147
  - `xgboost_tfidf`: threshold `0.177` (validation F1 `0.581`); test F1 change vs 0.5: `+0.111`.
148
+ - `embedding-logistic_sentence_embeddings`: threshold `0.722` (validation F1 `0.753`); test F1 change vs 0.5: `+0.077`.
149
+ - `embedding-svm_sentence_embeddings`: threshold `0.310` (validation F1 `0.747`); test F1 change vs 0.5: `+0.073`.
150
+ - `embedding-lightgbm_sentence_embeddings`: threshold `0.042` (validation F1 `0.728`); test F1 change vs 0.5: `+0.021`.
151
+ - `transformer`: threshold `0.471` (validation F1 `0.829`); test F1 change vs 0.5: `-0.003`.
152
 
153
  ## Artifacts
154
 
baselines/embedding-lightgbm/embedding-lightgbm.joblib CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a14be333902e726d49155cf98ec689843edfa4320b39724da54a187bea078e8
3
- size 1467460
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02039c6ee8487042ae61343afc227ab7375bbfdb042e073232a995d2e4d57dd6
3
+ size 1467646
baselines/embedding-lightgbm/test_predictions.csv CHANGED
The diff for this file is too large to render. See raw diff
 
baselines/embedding-lightgbm/validation_predictions.csv CHANGED
The diff for this file is too large to render. See raw diff
 
baselines/embedding-logistic/embedding-logistic.joblib CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:596282c69402bd7479f4057afeaeeec5cc81d9c13bede61569f3be96207798f0
3
- size 4287
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:433846875da231d3a97fc0f6bfa5adc3a1c4edb548d9655dc98a07523b436207
3
+ size 4361
baselines/embedding-logistic/test_predictions.csv CHANGED
The diff for this file is too large to render. See raw diff
 
baselines/embedding-logistic/validation_predictions.csv CHANGED
The diff for this file is too large to render. See raw diff
 
baselines/embedding-svm/embedding-svm.joblib CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4dcb68c9d78767b36ec44c943e7085a53ccbf4fc61e5568acaf2d3cf442f72e
3
- size 11696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df3e6eaec015a205089efe2457d89d2ecacdf1661b8607ad60905ef318adc5f4
3
+ size 11770
baselines/embedding-svm/test_predictions.csv CHANGED
The diff for this file is too large to render. See raw diff
 
baselines/embedding-svm/validation_predictions.csv CHANGED
The diff for this file is too large to render. See raw diff
 
report.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "created_at": "2026-05-26T17:46:00.691870+00:00",
3
  "config": {
4
  "hf_dataset": "faodl/amis-agri-utilization",
5
  "hf_subset": null,
@@ -38,8 +38,8 @@
38
  "embedding_batch_size": 64,
39
  "positive_label_name": "RELEVANT",
40
  "negative_label_name": "NOT_RELEVANT",
41
- "push_to_hub": false,
42
- "hub_model_id": null,
43
  "hub_private_repo": false
44
  },
45
  "dataset_summary": {
@@ -474,194 +474,194 @@
474
  "artifact_dir": "/content/agri-utilization-classifier/baselines/embedding-logistic",
475
  "artifact_file": "/content/agri-utilization-classifier/baselines/embedding-logistic/embedding-logistic.joblib",
476
  "validation_best_threshold": {
477
- "threshold": 0.616087721531811,
478
- "f1": 0.7282051282051282,
479
- "precision": 0.6120689655172413,
480
- "recall": 0.8987341772151899
481
  },
482
  "validation_default_0_5": {
483
  "threshold": 0.5,
484
- "accuracy": 0.9161554192229039,
485
- "precision": 0.4897959183673469,
486
- "recall": 0.9113924050632911,
487
- "f1": 0.6371681415929203,
488
  "confusion_matrix": [
489
  [
490
  824,
491
  75
492
  ],
493
  [
494
- 7,
495
- 72
496
  ]
497
  ],
498
  "classification_report": {
499
  "NOT_RELEVANT": {
500
- "precision": 0.9915764139590855,
501
  "recall": 0.9165739710789766,
502
- "f1-score": 0.9526011560693641,
503
  "support": 899.0
504
  },
505
  "RELEVANT": {
506
- "precision": 0.4897959183673469,
507
- "recall": 0.9113924050632911,
508
- "f1-score": 0.6371681415929203,
509
  "support": 79.0
510
  },
511
- "accuracy": 0.9161554192229039,
512
  "macro avg": {
513
- "precision": 0.7406861661632163,
514
- "recall": 0.9139831880711339,
515
- "f1-score": 0.7948846488311423,
516
  "support": 978.0
517
  },
518
  "weighted avg": {
519
- "precision": 0.9510440426382804,
520
- "recall": 0.9161554192229039,
521
- "f1-score": 0.9271213931413078,
522
  "support": 978.0
523
  }
524
  },
525
- "roc_auc": 0.9563227777699554,
526
- "average_precision": 0.7488532716951917
527
  },
528
  "validation_optimal_threshold": {
529
- "threshold": 0.616087721531811,
530
- "accuracy": 0.9458077709611452,
531
- "precision": 0.6120689655172413,
532
- "recall": 0.8987341772151899,
533
- "f1": 0.7282051282051282,
534
  "confusion_matrix": [
535
  [
536
- 854,
537
- 45
538
  ],
539
  [
540
- 8,
541
- 71
542
  ]
543
  ],
544
  "classification_report": {
545
  "NOT_RELEVANT": {
546
- "precision": 0.9907192575406032,
547
- "recall": 0.949944382647386,
548
- "f1-score": 0.9699034639409426,
549
  "support": 899.0
550
  },
551
  "RELEVANT": {
552
- "precision": 0.6120689655172413,
553
- "recall": 0.8987341772151899,
554
- "f1-score": 0.7282051282051282,
555
  "support": 79.0
556
  },
557
- "accuracy": 0.9458077709611452,
558
  "macro avg": {
559
- "precision": 0.8013941115289223,
560
- "recall": 0.924339279931288,
561
- "f1-score": 0.8490542960730354,
562
  "support": 978.0
563
  },
564
  "weighted avg": {
565
- "precision": 0.9601329865080412,
566
- "recall": 0.9458077709611452,
567
- "f1-score": 0.9503797742444914,
568
  "support": 978.0
569
  }
570
  },
571
- "roc_auc": 0.9563227777699554,
572
- "average_precision": 0.7488532716951917
573
  },
574
  "test_default_0_5": {
575
  "threshold": 0.5,
576
- "accuracy": 0.8986220472440944,
577
- "precision": 0.5243243243243243,
578
- "recall": 0.8660714285714286,
579
- "f1": 0.6531986531986532,
580
  "confusion_matrix": [
581
  [
582
- 816,
583
- 88
584
  ],
585
  [
586
- 15,
587
- 97
588
  ]
589
  ],
590
  "classification_report": {
591
  "NOT_RELEVANT": {
592
- "precision": 0.9819494584837545,
593
- "recall": 0.9026548672566371,
594
- "f1-score": 0.9406340057636887,
595
  "support": 904.0
596
  },
597
  "RELEVANT": {
598
- "precision": 0.5243243243243243,
599
- "recall": 0.8660714285714286,
600
- "f1-score": 0.6531986531986532,
601
  "support": 112.0
602
  },
603
- "accuracy": 0.8986220472440944,
604
  "macro avg": {
605
- "precision": 0.7531368914040394,
606
- "recall": 0.8843631479140328,
607
- "f1-score": 0.796916329481171,
608
  "support": 1016.0
609
  },
610
  "weighted avg": {
611
- "precision": 0.9315025933008252,
612
- "recall": 0.8986220472440944,
613
- "f1-score": 0.9089482188667557,
614
  "support": 1016.0
615
  }
616
  },
617
- "roc_auc": 0.9523842446270544,
618
- "average_precision": 0.7588349048416645
619
  },
620
  "test_optimal_threshold": {
621
- "threshold": 0.616087721531811,
622
- "accuracy": 0.9291338582677166,
623
- "precision": 0.631578947368421,
624
- "recall": 0.8571428571428571,
625
- "f1": 0.7272727272727273,
626
  "confusion_matrix": [
627
  [
628
- 848,
629
- 56
630
  ],
631
  [
632
- 16,
633
- 96
634
  ]
635
  ],
636
  "classification_report": {
637
  "NOT_RELEVANT": {
638
- "precision": 0.9814814814814815,
639
- "recall": 0.9380530973451328,
640
- "f1-score": 0.9592760180995475,
641
  "support": 904.0
642
  },
643
  "RELEVANT": {
644
- "precision": 0.631578947368421,
645
- "recall": 0.8571428571428571,
646
- "f1-score": 0.7272727272727273,
647
  "support": 112.0
648
  },
649
- "accuracy": 0.9291338582677166,
650
  "macro avg": {
651
- "precision": 0.8065302144249513,
652
- "recall": 0.8975979772439949,
653
- "f1-score": 0.8432743726861374,
654
  "support": 1016.0
655
  },
656
  "weighted avg": {
657
- "precision": 0.9429095485871283,
658
- "recall": 0.9291338582677166,
659
- "f1-score": 0.9337008521816303,
660
  "support": 1016.0
661
  }
662
  },
663
- "roc_auc": 0.9523842446270544,
664
- "average_precision": 0.7588349048416645
665
  }
666
  },
667
  {
@@ -671,194 +671,194 @@
671
  "artifact_dir": "/content/agri-utilization-classifier/baselines/embedding-svm",
672
  "artifact_file": "/content/agri-utilization-classifier/baselines/embedding-svm/embedding-svm.joblib",
673
  "validation_best_threshold": {
674
- "threshold": 0.27629376276966117,
675
- "f1": 0.7314285714285714,
676
- "precision": 0.6666666666666666,
677
- "recall": 0.810126582278481
678
  },
679
  "validation_default_0_5": {
680
  "threshold": 0.5,
681
- "accuracy": 0.9570552147239264,
682
- "precision": 0.8032786885245902,
683
- "recall": 0.620253164556962,
684
- "f1": 0.7,
685
  "confusion_matrix": [
686
  [
687
- 887,
688
- 12
689
  ],
690
  [
691
- 30,
692
- 49
693
  ]
694
  ],
695
  "classification_report": {
696
  "NOT_RELEVANT": {
697
- "precision": 0.9672846237731734,
698
- "recall": 0.9866518353726362,
699
- "f1-score": 0.9768722466960352,
700
  "support": 899.0
701
  },
702
  "RELEVANT": {
703
- "precision": 0.8032786885245902,
704
- "recall": 0.620253164556962,
705
- "f1-score": 0.7,
706
  "support": 79.0
707
  },
708
- "accuracy": 0.9570552147239264,
709
  "macro avg": {
710
- "precision": 0.8852816561488818,
711
- "recall": 0.8034524999647992,
712
- "f1-score": 0.8384361233480175,
713
  "support": 978.0
714
  },
715
  "weighted avg": {
716
- "precision": 0.9540367005782469,
717
- "recall": 0.9570552147239264,
718
- "f1-score": 0.9545073106132266,
719
  "support": 978.0
720
  }
721
  },
722
- "roc_auc": 0.9584911505047804,
723
- "average_precision": 0.7425325495012566
724
  },
725
  "validation_optimal_threshold": {
726
- "threshold": 0.27629376276966117,
727
- "accuracy": 0.9519427402862985,
728
- "precision": 0.6666666666666666,
729
- "recall": 0.810126582278481,
730
- "f1": 0.7314285714285714,
731
  "confusion_matrix": [
732
  [
733
- 867,
734
- 32
735
  ],
736
  [
737
- 15,
738
- 64
739
  ]
740
  ],
741
  "classification_report": {
742
  "NOT_RELEVANT": {
743
- "precision": 0.9829931972789115,
744
- "recall": 0.96440489432703,
745
- "f1-score": 0.9736103312745649,
746
  "support": 899.0
747
  },
748
  "RELEVANT": {
749
- "precision": 0.6666666666666666,
750
- "recall": 0.810126582278481,
751
- "f1-score": 0.7314285714285714,
752
  "support": 79.0
753
  },
754
- "accuracy": 0.9519427402862985,
755
  "macro avg": {
756
- "precision": 0.8248299319727891,
757
- "recall": 0.8872657383027556,
758
- "f1-score": 0.8525194513515681,
759
  "support": 978.0
760
  },
761
  "weighted avg": {
762
- "precision": 0.9574412587120738,
763
- "recall": 0.9519427402862985,
764
- "f1-score": 0.9540475919823016,
765
  "support": 978.0
766
  }
767
  },
768
- "roc_auc": 0.9584911505047804,
769
- "average_precision": 0.7425325495012566
770
  },
771
  "test_default_0_5": {
772
  "threshold": 0.5,
773
- "accuracy": 0.9409448818897638,
774
- "precision": 0.7708333333333334,
775
- "recall": 0.6607142857142857,
776
- "f1": 0.7115384615384616,
777
  "confusion_matrix": [
778
  [
779
  882,
780
  22
781
  ],
782
  [
783
- 38,
784
- 74
785
  ]
786
  ],
787
  "classification_report": {
788
  "NOT_RELEVANT": {
789
- "precision": 0.9586956521739131,
790
  "recall": 0.9756637168141593,
791
- "f1-score": 0.9671052631578947,
792
  "support": 904.0
793
  },
794
  "RELEVANT": {
795
- "precision": 0.7708333333333334,
796
- "recall": 0.6607142857142857,
797
- "f1-score": 0.7115384615384616,
798
  "support": 112.0
799
  },
800
- "accuracy": 0.9409448818897638,
801
  "macro avg": {
802
- "precision": 0.8647644927536232,
803
- "recall": 0.8181890012642226,
804
- "f1-score": 0.8393218623481782,
805
  "support": 1016.0
806
  },
807
  "weighted avg": {
808
- "precision": 0.9379864201757389,
809
- "recall": 0.9409448818897638,
810
- "f1-score": 0.9389325448691382,
811
  "support": 1016.0
812
  }
813
  },
814
- "roc_auc": 0.9517817635903919,
815
- "average_precision": 0.743247391124005
816
  },
817
  "test_optimal_threshold": {
818
- "threshold": 0.27629376276966117,
819
- "accuracy": 0.9350393700787402,
820
- "precision": 0.6666666666666666,
821
- "recall": 0.8214285714285714,
822
- "f1": 0.736,
823
  "confusion_matrix": [
824
  [
825
- 858,
826
- 46
827
  ],
828
  [
829
- 20,
830
- 92
831
  ]
832
  ],
833
  "classification_report": {
834
  "NOT_RELEVANT": {
835
- "precision": 0.9772209567198178,
836
- "recall": 0.9491150442477876,
837
- "f1-score": 0.9629629629629629,
838
  "support": 904.0
839
  },
840
  "RELEVANT": {
841
- "precision": 0.6666666666666666,
842
- "recall": 0.8214285714285714,
843
- "f1-score": 0.736,
844
  "support": 112.0
845
  },
846
- "accuracy": 0.9350393700787402,
847
  "macro avg": {
848
- "precision": 0.8219438116932423,
849
- "recall": 0.8852718078381795,
850
- "f1-score": 0.8494814814814815,
851
  "support": 1016.0
852
  },
853
  "weighted avg": {
854
- "precision": 0.9429866255328562,
855
- "recall": 0.9350393700787402,
856
- "f1-score": 0.9379434237386993,
857
  "support": 1016.0
858
  }
859
  },
860
- "roc_auc": 0.9517817635903919,
861
- "average_precision": 0.743247391124005
862
  }
863
  },
864
  {
@@ -868,159 +868,159 @@
868
  "artifact_dir": "/content/agri-utilization-classifier/baselines/embedding-lightgbm",
869
  "artifact_file": "/content/agri-utilization-classifier/baselines/embedding-lightgbm/embedding-lightgbm.joblib",
870
  "validation_best_threshold": {
871
- "threshold": 0.05244099185733503,
872
- "f1": 0.7386363636363636,
873
- "precision": 0.6701030927835051,
874
- "recall": 0.8227848101265823
875
  },
876
  "validation_default_0_5": {
877
  "threshold": 0.5,
878
- "accuracy": 0.9478527607361963,
879
- "precision": 0.7,
880
- "recall": 0.620253164556962,
881
- "f1": 0.6577181208053692,
882
  "confusion_matrix": [
883
  [
884
- 878,
885
- 21
886
  ],
887
  [
888
- 30,
889
- 49
890
  ]
891
  ],
892
  "classification_report": {
893
  "NOT_RELEVANT": {
894
- "precision": 0.9669603524229075,
895
- "recall": 0.9766407119021134,
896
- "f1-score": 0.9717764250138351,
897
  "support": 899.0
898
  },
899
  "RELEVANT": {
900
- "precision": 0.7,
901
- "recall": 0.620253164556962,
902
- "f1-score": 0.6577181208053692,
903
  "support": 79.0
904
  },
905
- "accuracy": 0.9478527607361963,
906
  "macro avg": {
907
- "precision": 0.8334801762114536,
908
- "recall": 0.7984469382295377,
909
- "f1-score": 0.8147472729096021,
910
  "support": 978.0
911
  },
912
  "weighted avg": {
913
- "precision": 0.9453960703764762,
914
- "recall": 0.9478527607361963,
915
- "f1-score": 0.9464077071892248,
916
  "support": 978.0
917
  }
918
  },
919
- "roc_auc": 0.952112755382211,
920
- "average_precision": 0.777786126005225
921
  },
922
  "validation_optimal_threshold": {
923
- "threshold": 0.05244099185733503,
924
- "accuracy": 0.9529652351738241,
925
- "precision": 0.6701030927835051,
926
- "recall": 0.8227848101265823,
927
- "f1": 0.7386363636363636,
928
  "confusion_matrix": [
929
  [
930
- 867,
931
- 32
932
  ],
933
  [
934
- 14,
935
- 65
936
  ]
937
  ],
938
  "classification_report": {
939
  "NOT_RELEVANT": {
940
- "precision": 0.9841089670828603,
941
- "recall": 0.96440489432703,
942
- "f1-score": 0.9741573033707865,
943
  "support": 899.0
944
  },
945
  "RELEVANT": {
946
- "precision": 0.6701030927835051,
947
- "recall": 0.8227848101265823,
948
- "f1-score": 0.7386363636363636,
949
  "support": 79.0
950
  },
951
- "accuracy": 0.9529652351738241,
952
  "macro avg": {
953
- "precision": 0.8271060299331827,
954
- "recall": 0.8935948522268062,
955
- "f1-score": 0.8563968335035751,
956
  "support": 978.0
957
  },
958
  "weighted avg": {
959
- "precision": 0.9587444843940576,
960
- "recall": 0.9529652351738241,
961
- "f1-score": 0.9551326057848771,
962
  "support": 978.0
963
  }
964
  },
965
- "roc_auc": 0.952112755382211,
966
- "average_precision": 0.777786126005225
967
  },
968
  "test_default_0_5": {
969
  "threshold": 0.5,
970
- "accuracy": 0.9458661417322834,
971
- "precision": 0.7878787878787878,
972
- "recall": 0.6964285714285714,
973
- "f1": 0.7393364928909952,
974
  "confusion_matrix": [
975
  [
976
- 883,
977
- 21
978
  ],
979
  [
980
- 34,
981
- 78
982
  ]
983
  ],
984
  "classification_report": {
985
  "NOT_RELEVANT": {
986
- "precision": 0.9629225736095965,
987
- "recall": 0.9767699115044248,
988
- "f1-score": 0.9697968149368479,
989
  "support": 904.0
990
  },
991
  "RELEVANT": {
992
- "precision": 0.7878787878787878,
993
- "recall": 0.6964285714285714,
994
- "f1-score": 0.7393364928909952,
995
  "support": 112.0
996
  },
997
- "accuracy": 0.9458661417322834,
998
  "macro avg": {
999
- "precision": 0.8754006807441922,
1000
- "recall": 0.8365992414664981,
1001
- "f1-score": 0.8545666539139216,
1002
  "support": 1016.0
1003
  },
1004
  "weighted avg": {
1005
- "precision": 0.9436264082534445,
1006
- "recall": 0.9458661417322834,
1007
- "f1-score": 0.9443917400656515,
1008
  "support": 1016.0
1009
  }
1010
  },
1011
- "roc_auc": 0.9585078223767383,
1012
- "average_precision": 0.8011064601086128
1013
  },
1014
  "test_optimal_threshold": {
1015
- "threshold": 0.05244099185733503,
1016
- "accuracy": 0.9330708661417323,
1017
- "precision": 0.6571428571428571,
1018
  "recall": 0.8214285714285714,
1019
- "f1": 0.7301587301587301,
1020
  "confusion_matrix": [
1021
  [
1022
- 856,
1023
- 48
1024
  ],
1025
  [
1026
  20,
@@ -1029,33 +1029,33 @@
1029
  ],
1030
  "classification_report": {
1031
  "NOT_RELEVANT": {
1032
- "precision": 0.9771689497716894,
1033
- "recall": 0.9469026548672567,
1034
- "f1-score": 0.9617977528089887,
1035
  "support": 904.0
1036
  },
1037
  "RELEVANT": {
1038
- "precision": 0.6571428571428571,
1039
  "recall": 0.8214285714285714,
1040
- "f1-score": 0.7301587301587301,
1041
  "support": 112.0
1042
  },
1043
- "accuracy": 0.9330708661417323,
1044
  "macro avg": {
1045
- "precision": 0.8171559034572733,
1046
- "recall": 0.8841656131479141,
1047
- "f1-score": 0.8459782414838595,
1048
  "support": 1016.0
1049
  },
1050
  "weighted avg": {
1051
- "precision": 0.9418904828677237,
1052
- "recall": 0.9330708661417323,
1053
- "f1-score": 0.936262742438094,
1054
  "support": 1016.0
1055
  }
1056
  },
1057
- "roc_auc": 0.9585078223767383,
1058
- "average_precision": 0.8011064601086128
1059
  }
1060
  },
1061
  {
@@ -1063,194 +1063,194 @@
1063
  "model_name": "FacebookAI/xlm-roberta-base",
1064
  "artifact_dir": "/content/agri-utilization-classifier/transformer",
1065
  "validation_best_threshold": {
1066
- "threshold": 0.4999122619628906,
1067
- "f1": 0.8484848484848485,
1068
- "precision": 0.813953488372093,
1069
- "recall": 0.8860759493670886
1070
  },
1071
  "validation_default_0_5": {
1072
  "threshold": 0.5,
1073
- "accuracy": 0.9734151329243353,
1074
- "precision": 0.8117647058823529,
1075
- "recall": 0.8734177215189873,
1076
- "f1": 0.8414634146341463,
1077
  "confusion_matrix": [
1078
  [
1079
- 883,
1080
- 16
1081
  ],
1082
  [
1083
- 10,
1084
- 69
1085
  ]
1086
  ],
1087
  "classification_report": {
1088
  "NOT_RELEVANT": {
1089
- "precision": 0.9888017917133258,
1090
- "recall": 0.982202447163515,
1091
- "f1-score": 0.9854910714285714,
1092
  "support": 899.0
1093
  },
1094
  "RELEVANT": {
1095
- "precision": 0.8117647058823529,
1096
- "recall": 0.8734177215189873,
1097
- "f1-score": 0.8414634146341463,
1098
  "support": 79.0
1099
  },
1100
- "accuracy": 0.9734151329243353,
1101
  "macro avg": {
1102
- "precision": 0.9002832487978394,
1103
- "recall": 0.9278100843412511,
1104
- "f1-score": 0.9134772430313589,
1105
  "support": 978.0
1106
  },
1107
  "weighted avg": {
1108
- "precision": 0.974501250015323,
1109
- "recall": 0.9734151329243353,
1110
- "f1-score": 0.9738569355525392,
1111
  "support": 978.0
1112
  }
1113
  },
1114
- "roc_auc": 0.9707692091071654,
1115
- "average_precision": 0.836048392061997
1116
  },
1117
  "validation_optimal_threshold": {
1118
- "threshold": 0.4999122619628906,
1119
- "accuracy": 0.9744376278118609,
1120
- "precision": 0.813953488372093,
1121
- "recall": 0.8860759493670886,
1122
- "f1": 0.8484848484848485,
1123
  "confusion_matrix": [
1124
  [
1125
- 883,
1126
- 16
1127
  ],
1128
  [
1129
- 9,
1130
- 70
1131
  ]
1132
  ],
1133
  "classification_report": {
1134
  "NOT_RELEVANT": {
1135
- "precision": 0.9899103139013453,
1136
- "recall": 0.982202447163515,
1137
- "f1-score": 0.9860413176996091,
1138
  "support": 899.0
1139
  },
1140
  "RELEVANT": {
1141
- "precision": 0.813953488372093,
1142
- "recall": 0.8860759493670886,
1143
- "f1-score": 0.8484848484848485,
1144
  "support": 79.0
1145
  },
1146
- "accuracy": 0.9744376278118609,
1147
  "macro avg": {
1148
- "precision": 0.9019319011367192,
1149
- "recall": 0.9341391982653018,
1150
- "f1-score": 0.9172630830922288,
1151
  "support": 978.0
1152
  },
1153
  "weighted avg": {
1154
- "precision": 0.9756970324935632,
1155
- "recall": 0.9744376278118609,
1156
- "f1-score": 0.9749299055646745,
1157
  "support": 978.0
1158
  }
1159
  },
1160
- "roc_auc": 0.9707692091071654,
1161
- "average_precision": 0.836048392061997
1162
  },
1163
  "test_default_0_5": {
1164
  "threshold": 0.5,
1165
- "accuracy": 0.9448818897637795,
1166
- "precision": 0.75,
1167
- "recall": 0.75,
1168
- "f1": 0.75,
1169
  "confusion_matrix": [
1170
  [
1171
- 876,
1172
- 28
1173
  ],
1174
  [
1175
- 28,
1176
- 84
1177
  ]
1178
  ],
1179
  "classification_report": {
1180
  "NOT_RELEVANT": {
1181
- "precision": 0.9690265486725663,
1182
- "recall": 0.9690265486725663,
1183
- "f1-score": 0.9690265486725663,
1184
  "support": 904.0
1185
  },
1186
  "RELEVANT": {
1187
- "precision": 0.75,
1188
- "recall": 0.75,
1189
- "f1-score": 0.75,
1190
  "support": 112.0
1191
  },
1192
- "accuracy": 0.9448818897637795,
1193
  "macro avg": {
1194
- "precision": 0.8595132743362832,
1195
- "recall": 0.8595132743362832,
1196
- "f1-score": 0.8595132743362832,
1197
  "support": 1016.0
1198
  },
1199
  "weighted avg": {
1200
- "precision": 0.9448818897637795,
1201
- "recall": 0.9448818897637795,
1202
- "f1-score": 0.9448818897637795,
1203
  "support": 1016.0
1204
  }
1205
  },
1206
- "roc_auc": 0.9541373656763591,
1207
- "average_precision": 0.7726393350690168
1208
  },
1209
  "test_optimal_threshold": {
1210
- "threshold": 0.4999122619628906,
1211
- "accuracy": 0.9448818897637795,
1212
- "precision": 0.75,
1213
- "recall": 0.75,
1214
- "f1": 0.75,
1215
  "confusion_matrix": [
1216
  [
1217
- 876,
1218
- 28
1219
  ],
1220
  [
1221
- 28,
1222
- 84
1223
  ]
1224
  ],
1225
  "classification_report": {
1226
  "NOT_RELEVANT": {
1227
- "precision": 0.9690265486725663,
1228
- "recall": 0.9690265486725663,
1229
- "f1-score": 0.9690265486725663,
1230
  "support": 904.0
1231
  },
1232
  "RELEVANT": {
1233
- "precision": 0.75,
1234
- "recall": 0.75,
1235
- "f1-score": 0.75,
1236
  "support": 112.0
1237
  },
1238
- "accuracy": 0.9448818897637795,
1239
  "macro avg": {
1240
- "precision": 0.8595132743362832,
1241
- "recall": 0.8595132743362832,
1242
- "f1-score": 0.8595132743362832,
1243
  "support": 1016.0
1244
  },
1245
  "weighted avg": {
1246
- "precision": 0.9448818897637795,
1247
- "recall": 0.9448818897637795,
1248
- "f1-score": 0.9448818897637795,
1249
  "support": 1016.0
1250
  }
1251
  },
1252
- "roc_auc": 0.9541373656763591,
1253
- "average_precision": 0.7726393350690168
1254
  }
1255
  }
1256
  ]
 
1
  {
2
+ "created_at": "2026-05-27T10:50:45.867038+00:00",
3
  "config": {
4
  "hf_dataset": "faodl/amis-agri-utilization",
5
  "hf_subset": null,
 
38
  "embedding_batch_size": 64,
39
  "positive_label_name": "RELEVANT",
40
  "negative_label_name": "NOT_RELEVANT",
41
+ "push_to_hub": true,
42
+ "hub_model_id": "faodl/agri-utilization-classifier",
43
  "hub_private_repo": false
44
  },
45
  "dataset_summary": {
 
474
  "artifact_dir": "/content/agri-utilization-classifier/baselines/embedding-logistic",
475
  "artifact_file": "/content/agri-utilization-classifier/baselines/embedding-logistic/embedding-logistic.joblib",
476
  "validation_best_threshold": {
477
+ "threshold": 0.7220406191151401,
478
+ "f1": 0.7529411764705883,
479
+ "precision": 0.7032967032967034,
480
+ "recall": 0.810126582278481
481
  },
482
  "validation_default_0_5": {
483
  "threshold": 0.5,
484
+ "accuracy": 0.9120654396728016,
485
+ "precision": 0.4755244755244755,
486
+ "recall": 0.8607594936708861,
487
+ "f1": 0.6126126126126126,
488
  "confusion_matrix": [
489
  [
490
  824,
491
  75
492
  ],
493
  [
494
+ 11,
495
+ 68
496
  ]
497
  ],
498
  "classification_report": {
499
  "NOT_RELEVANT": {
500
+ "precision": 0.9868263473053892,
501
  "recall": 0.9165739710789766,
502
+ "f1-score": 0.9504036908881199,
503
  "support": 899.0
504
  },
505
  "RELEVANT": {
506
+ "precision": 0.4755244755244755,
507
+ "recall": 0.8607594936708861,
508
+ "f1-score": 0.6126126126126126,
509
  "support": 79.0
510
  },
511
+ "accuracy": 0.9120654396728016,
512
  "macro avg": {
513
+ "precision": 0.7311754114149324,
514
+ "recall": 0.8886667323749313,
515
+ "f1-score": 0.7815081517503663,
516
  "support": 978.0
517
  },
518
  "weighted avg": {
519
+ "precision": 0.9455248668650087,
520
+ "recall": 0.9120654396728016,
521
+ "f1-score": 0.9231179084916321,
522
  "support": 978.0
523
  }
524
  },
525
+ "roc_auc": 0.9525633263400967,
526
+ "average_precision": 0.7622834015915168
527
  },
528
  "validation_optimal_threshold": {
529
+ "threshold": 0.7220406191151401,
530
+ "accuracy": 0.9570552147239264,
531
+ "precision": 0.7032967032967034,
532
+ "recall": 0.810126582278481,
533
+ "f1": 0.7529411764705882,
534
  "confusion_matrix": [
535
  [
536
+ 872,
537
+ 27
538
  ],
539
  [
540
+ 15,
541
+ 64
542
  ]
543
  ],
544
  "classification_report": {
545
  "NOT_RELEVANT": {
546
+ "precision": 0.9830890642615558,
547
+ "recall": 0.9699666295884316,
548
+ "f1-score": 0.9764837625979843,
549
  "support": 899.0
550
  },
551
  "RELEVANT": {
552
+ "precision": 0.7032967032967034,
553
+ "recall": 0.810126582278481,
554
+ "f1-score": 0.7529411764705882,
555
  "support": 79.0
556
  },
557
+ "accuracy": 0.9570552147239264,
558
  "macro avg": {
559
+ "precision": 0.8431928837791296,
560
+ "recall": 0.8900466059334563,
561
+ "f1-score": 0.8647124695342863,
562
  "support": 978.0
563
  },
564
  "weighted avg": {
565
+ "precision": 0.9604882498277897,
566
+ "recall": 0.9570552147239264,
567
+ "f1-score": 0.9584266416326834,
568
  "support": 978.0
569
  }
570
  },
571
+ "roc_auc": 0.9525633263400967,
572
+ "average_precision": 0.7622834015915168
573
  },
574
  "test_default_0_5": {
575
  "threshold": 0.5,
576
+ "accuracy": 0.890748031496063,
577
+ "precision": 0.5025380710659898,
578
+ "recall": 0.8839285714285714,
579
+ "f1": 0.6407766990291263,
580
  "confusion_matrix": [
581
  [
582
+ 806,
583
+ 98
584
  ],
585
  [
586
+ 13,
587
+ 99
588
  ]
589
  ],
590
  "classification_report": {
591
  "NOT_RELEVANT": {
592
+ "precision": 0.9841269841269841,
593
+ "recall": 0.8915929203539823,
594
+ "f1-score": 0.9355774811375508,
595
  "support": 904.0
596
  },
597
  "RELEVANT": {
598
+ "precision": 0.5025380710659898,
599
+ "recall": 0.8839285714285714,
600
+ "f1-score": 0.6407766990291263,
601
  "support": 112.0
602
  },
603
+ "accuracy": 0.890748031496063,
604
  "macro avg": {
605
+ "precision": 0.7433325275964869,
606
+ "recall": 0.8877607458912768,
607
+ "f1-score": 0.7881770900833385,
608
  "support": 1016.0
609
  },
610
  "weighted avg": {
611
+ "precision": 0.9310384425297091,
612
+ "recall": 0.890748031496063,
613
+ "f1-score": 0.9030797571255984,
614
  "support": 1016.0
615
  }
616
  },
617
+ "roc_auc": 0.955317635903919,
618
+ "average_precision": 0.7096184898069098
619
  },
620
  "test_optimal_threshold": {
621
+ "threshold": 0.7220406191151401,
622
+ "accuracy": 0.9350393700787402,
623
+ "precision": 0.6885245901639344,
624
+ "recall": 0.75,
625
+ "f1": 0.717948717948718,
626
  "confusion_matrix": [
627
  [
628
+ 866,
629
+ 38
630
  ],
631
  [
632
+ 28,
633
+ 84
634
  ]
635
  ],
636
  "classification_report": {
637
  "NOT_RELEVANT": {
638
+ "precision": 0.9686800894854586,
639
+ "recall": 0.9579646017699115,
640
+ "f1-score": 0.9632925472747497,
641
  "support": 904.0
642
  },
643
  "RELEVANT": {
644
+ "precision": 0.6885245901639344,
645
+ "recall": 0.75,
646
+ "f1-score": 0.717948717948718,
647
  "support": 112.0
648
  },
649
+ "accuracy": 0.9350393700787402,
650
  "macro avg": {
651
+ "precision": 0.8286023398246964,
652
+ "recall": 0.8539823008849557,
653
+ "f1-score": 0.8406206326117338,
654
  "support": 1016.0
655
  },
656
  "weighted avg": {
657
+ "precision": 0.9377968060956843,
658
+ "recall": 0.9350393700787402,
659
+ "f1-score": 0.9362467708136123,
660
  "support": 1016.0
661
  }
662
  },
663
+ "roc_auc": 0.955317635903919,
664
+ "average_precision": 0.7096184898069098
665
  }
666
  },
667
  {
 
671
  "artifact_dir": "/content/agri-utilization-classifier/baselines/embedding-svm",
672
  "artifact_file": "/content/agri-utilization-classifier/baselines/embedding-svm/embedding-svm.joblib",
673
  "validation_best_threshold": {
674
+ "threshold": 0.30975184413575924,
675
+ "f1": 0.746987951807229,
676
+ "precision": 0.7126436781609196,
677
+ "recall": 0.7848101265822784
678
  },
679
  "validation_default_0_5": {
680
  "threshold": 0.5,
681
+ "accuracy": 0.9550102249488752,
682
+ "precision": 0.8070175438596491,
683
+ "recall": 0.5822784810126582,
684
+ "f1": 0.6764705882352942,
685
  "confusion_matrix": [
686
  [
687
+ 888,
688
+ 11
689
  ],
690
  [
691
+ 33,
692
+ 46
693
  ]
694
  ],
695
  "classification_report": {
696
  "NOT_RELEVANT": {
697
+ "precision": 0.9641693811074918,
698
+ "recall": 0.9877641824249166,
699
+ "f1-score": 0.9758241758241758,
700
  "support": 899.0
701
  },
702
  "RELEVANT": {
703
+ "precision": 0.8070175438596491,
704
+ "recall": 0.5822784810126582,
705
+ "f1-score": 0.6764705882352942,
706
  "support": 79.0
707
  },
708
+ "accuracy": 0.9550102249488752,
709
  "macro avg": {
710
+ "precision": 0.8855934624835704,
711
+ "recall": 0.7850213317187874,
712
+ "f1-score": 0.8261473820297349,
713
  "support": 978.0
714
  },
715
  "weighted avg": {
716
+ "precision": 0.9514751120455496,
717
+ "recall": 0.9550102249488752,
718
+ "f1-score": 0.9516432623072826,
719
  "support": 978.0
720
  }
721
  },
722
+ "roc_auc": 0.9524506836006251,
723
+ "average_precision": 0.7542419360138435
724
  },
725
  "validation_optimal_threshold": {
726
+ "threshold": 0.30975184413575924,
727
+ "accuracy": 0.9570552147239264,
728
+ "precision": 0.7126436781609196,
729
+ "recall": 0.7848101265822784,
730
+ "f1": 0.7469879518072289,
731
  "confusion_matrix": [
732
  [
733
+ 874,
734
+ 25
735
  ],
736
  [
737
+ 17,
738
+ 62
739
  ]
740
  ],
741
  "classification_report": {
742
  "NOT_RELEVANT": {
743
+ "precision": 0.9809203142536476,
744
+ "recall": 0.9721913236929922,
745
+ "f1-score": 0.976536312849162,
746
  "support": 899.0
747
  },
748
  "RELEVANT": {
749
+ "precision": 0.7126436781609196,
750
+ "recall": 0.7848101265822784,
751
+ "f1-score": 0.7469879518072289,
752
  "support": 79.0
753
  },
754
+ "accuracy": 0.9570552147239264,
755
  "macro avg": {
756
+ "precision": 0.8467819962072836,
757
+ "recall": 0.8785007251376353,
758
+ "f1-score": 0.8617621323281954,
759
  "support": 978.0
760
  },
761
  "weighted avg": {
762
+ "precision": 0.9592497066347054,
763
+ "recall": 0.9570552147239264,
764
+ "f1-score": 0.9579940628263474,
765
  "support": 978.0
766
  }
767
  },
768
+ "roc_auc": 0.9524506836006251,
769
+ "average_precision": 0.7542419360138435
770
  },
771
  "test_default_0_5": {
772
  "threshold": 0.5,
773
+ "accuracy": 0.9301181102362205,
774
+ "precision": 0.7411764705882353,
775
+ "recall": 0.5625,
776
+ "f1": 0.6395939086294417,
777
  "confusion_matrix": [
778
  [
779
  882,
780
  22
781
  ],
782
  [
783
+ 49,
784
+ 63
785
  ]
786
  ],
787
  "classification_report": {
788
  "NOT_RELEVANT": {
789
+ "precision": 0.9473684210526315,
790
  "recall": 0.9756637168141593,
791
+ "f1-score": 0.9613079019073569,
792
  "support": 904.0
793
  },
794
  "RELEVANT": {
795
+ "precision": 0.7411764705882353,
796
+ "recall": 0.5625,
797
+ "f1-score": 0.6395939086294417,
798
  "support": 112.0
799
  },
800
+ "accuracy": 0.9301181102362205,
801
  "macro avg": {
802
+ "precision": 0.8442724458204334,
803
+ "recall": 0.7690818584070797,
804
+ "f1-score": 0.8004509052683992,
805
  "support": 1016.0
806
  },
807
  "weighted avg": {
808
+ "precision": 0.9246385997415957,
809
+ "recall": 0.9301181102362205,
810
+ "f1-score": 0.9258433672153032,
811
  "support": 1016.0
812
  }
813
  },
814
+ "roc_auc": 0.9563744469026548,
815
+ "average_precision": 0.7035914186137721
816
  },
817
  "test_optimal_threshold": {
818
+ "threshold": 0.30975184413575924,
819
+ "accuracy": 0.9340551181102362,
820
+ "precision": 0.6859504132231405,
821
+ "recall": 0.7410714285714286,
822
+ "f1": 0.7124463519313304,
823
  "confusion_matrix": [
824
  [
825
+ 866,
826
+ 38
827
  ],
828
  [
829
+ 29,
830
+ 83
831
  ]
832
  ],
833
  "classification_report": {
834
  "NOT_RELEVANT": {
835
+ "precision": 0.9675977653631285,
836
+ "recall": 0.9579646017699115,
837
+ "f1-score": 0.962757087270706,
838
  "support": 904.0
839
  },
840
  "RELEVANT": {
841
+ "precision": 0.6859504132231405,
842
+ "recall": 0.7410714285714286,
843
+ "f1-score": 0.7124463519313304,
844
  "support": 112.0
845
  },
846
+ "accuracy": 0.9340551181102362,
847
  "macro avg": {
848
+ "precision": 0.8267740892931346,
849
+ "recall": 0.84951801517067,
850
+ "f1-score": 0.8376017196010181,
851
  "support": 1016.0
852
  },
853
  "weighted avg": {
854
+ "precision": 0.9365500257571455,
855
+ "recall": 0.9340551181102362,
856
+ "f1-score": 0.9351637778632157,
857
  "support": 1016.0
858
  }
859
  },
860
+ "roc_auc": 0.9563744469026548,
861
+ "average_precision": 0.7035914186137721
862
  }
863
  },
864
  {
 
868
  "artifact_dir": "/content/agri-utilization-classifier/baselines/embedding-lightgbm",
869
  "artifact_file": "/content/agri-utilization-classifier/baselines/embedding-lightgbm/embedding-lightgbm.joblib",
870
  "validation_best_threshold": {
871
+ "threshold": 0.042041465431985434,
872
+ "f1": 0.7283236994219654,
873
+ "precision": 0.6702127659574468,
874
+ "recall": 0.7974683544303798
875
  },
876
  "validation_default_0_5": {
877
  "threshold": 0.5,
878
+ "accuracy": 0.9539877300613497,
879
+ "precision": 0.75,
880
+ "recall": 0.6455696202531646,
881
+ "f1": 0.6938775510204082,
882
  "confusion_matrix": [
883
  [
884
+ 882,
885
+ 17
886
  ],
887
  [
888
+ 28,
889
+ 51
890
  ]
891
  ],
892
  "classification_report": {
893
  "NOT_RELEVANT": {
894
+ "precision": 0.9692307692307692,
895
+ "recall": 0.9810901001112347,
896
+ "f1-score": 0.9751243781094527,
897
  "support": 899.0
898
  },
899
  "RELEVANT": {
900
+ "precision": 0.75,
901
+ "recall": 0.6455696202531646,
902
+ "f1-score": 0.6938775510204082,
903
  "support": 79.0
904
  },
905
+ "accuracy": 0.9539877300613497,
906
  "macro avg": {
907
+ "precision": 0.8596153846153847,
908
+ "recall": 0.8133298601821997,
909
+ "f1-score": 0.8345009645649304,
910
  "support": 978.0
911
  },
912
  "weighted avg": {
913
+ "precision": 0.9515219443133554,
914
+ "recall": 0.9539877300613497,
915
+ "f1-score": 0.9524060761257774,
916
  "support": 978.0
917
  }
918
  },
919
+ "roc_auc": 0.9480716971036736,
920
+ "average_precision": 0.7818499996214695
921
  },
922
  "validation_optimal_threshold": {
923
+ "threshold": 0.042041465431985434,
924
+ "accuracy": 0.9519427402862985,
925
+ "precision": 0.6702127659574468,
926
+ "recall": 0.7974683544303798,
927
+ "f1": 0.7283236994219653,
928
  "confusion_matrix": [
929
  [
930
+ 868,
931
+ 31
932
  ],
933
  [
934
+ 16,
935
+ 63
936
  ]
937
  ],
938
  "classification_report": {
939
  "NOT_RELEVANT": {
940
+ "precision": 0.9819004524886877,
941
+ "recall": 0.9655172413793104,
942
+ "f1-score": 0.9736399326977005,
943
  "support": 899.0
944
  },
945
  "RELEVANT": {
946
+ "precision": 0.6702127659574468,
947
+ "recall": 0.7974683544303798,
948
+ "f1-score": 0.7283236994219653,
949
  "support": 79.0
950
  },
951
+ "accuracy": 0.9519427402862985,
952
  "macro avg": {
953
+ "precision": 0.8260566092230672,
954
+ "recall": 0.881492797904845,
955
+ "f1-score": 0.8509818160598329,
956
  "support": 978.0
957
  },
958
  "weighted avg": {
959
+ "precision": 0.9567232262760416,
960
+ "recall": 0.9519427402862985,
961
+ "f1-score": 0.9538239997439346,
962
  "support": 978.0
963
  }
964
  },
965
+ "roc_auc": 0.9480716971036736,
966
+ "average_precision": 0.7818499996214695
967
  },
968
  "test_default_0_5": {
969
  "threshold": 0.5,
970
+ "accuracy": 0.937007874015748,
971
+ "precision": 0.74,
972
+ "recall": 0.6607142857142857,
973
+ "f1": 0.6981132075471698,
974
  "confusion_matrix": [
975
  [
976
+ 878,
977
+ 26
978
  ],
979
  [
980
+ 38,
981
+ 74
982
  ]
983
  ],
984
  "classification_report": {
985
  "NOT_RELEVANT": {
986
+ "precision": 0.9585152838427947,
987
+ "recall": 0.9712389380530974,
988
+ "f1-score": 0.9648351648351648,
989
  "support": 904.0
990
  },
991
  "RELEVANT": {
992
+ "precision": 0.74,
993
+ "recall": 0.6607142857142857,
994
+ "f1-score": 0.6981132075471698,
995
  "support": 112.0
996
  },
997
+ "accuracy": 0.937007874015748,
998
  "macro avg": {
999
+ "precision": 0.8492576419213973,
1000
+ "recall": 0.8159766118836915,
1001
+ "f1-score": 0.8314741861911673,
1002
  "support": 1016.0
1003
  },
1004
  "weighted avg": {
1005
+ "precision": 0.9344269848365024,
1006
+ "recall": 0.937007874015748,
1007
+ "f1-score": 0.9354327443467244,
1008
  "support": 1016.0
1009
  }
1010
  },
1011
+ "roc_auc": 0.9597819216182049,
1012
+ "average_precision": 0.7911233572387708
1013
  },
1014
  "test_optimal_threshold": {
1015
+ "threshold": 0.042041465431985434,
1016
+ "accuracy": 0.9291338582677166,
1017
+ "precision": 0.6388888888888888,
1018
  "recall": 0.8214285714285714,
1019
+ "f1": 0.71875,
1020
  "confusion_matrix": [
1021
  [
1022
+ 852,
1023
+ 52
1024
  ],
1025
  [
1026
  20,
 
1029
  ],
1030
  "classification_report": {
1031
  "NOT_RELEVANT": {
1032
+ "precision": 0.9770642201834863,
1033
+ "recall": 0.9424778761061947,
1034
+ "f1-score": 0.9594594594594594,
1035
  "support": 904.0
1036
  },
1037
  "RELEVANT": {
1038
+ "precision": 0.6388888888888888,
1039
  "recall": 0.8214285714285714,
1040
+ "f1-score": 0.71875,
1041
  "support": 112.0
1042
  },
1043
+ "accuracy": 0.9291338582677166,
1044
  "macro avg": {
1045
+ "precision": 0.8079765545361876,
1046
+ "recall": 0.881953223767383,
1047
+ "f1-score": 0.8391047297297297,
1048
  "support": 1016.0
1049
  },
1050
  "weighted avg": {
1051
+ "precision": 0.9397850498045542,
1052
+ "recall": 0.9291338582677166,
1053
+ "f1-score": 0.9329245584166844,
1054
  "support": 1016.0
1055
  }
1056
  },
1057
+ "roc_auc": 0.9597819216182049,
1058
+ "average_precision": 0.7911233572387708
1059
  }
1060
  },
1061
  {
 
1063
  "model_name": "FacebookAI/xlm-roberta-base",
1064
  "artifact_dir": "/content/agri-utilization-classifier/transformer",
1065
  "validation_best_threshold": {
1066
+ "threshold": 0.4710787534713745,
1067
+ "f1": 0.829268292682927,
1068
+ "precision": 0.8,
1069
+ "recall": 0.8607594936708861
1070
  },
1071
  "validation_default_0_5": {
1072
  "threshold": 0.5,
1073
+ "accuracy": 0.9703476482617587,
1074
+ "precision": 0.7976190476190477,
1075
+ "recall": 0.8481012658227848,
1076
+ "f1": 0.8220858895705522,
1077
  "confusion_matrix": [
1078
  [
1079
+ 882,
1080
+ 17
1081
  ],
1082
  [
1083
+ 12,
1084
+ 67
1085
  ]
1086
  ],
1087
  "classification_report": {
1088
  "NOT_RELEVANT": {
1089
+ "precision": 0.9865771812080537,
1090
+ "recall": 0.9810901001112347,
1091
+ "f1-score": 0.9838259899609593,
1092
  "support": 899.0
1093
  },
1094
  "RELEVANT": {
1095
+ "precision": 0.7976190476190477,
1096
+ "recall": 0.8481012658227848,
1097
+ "f1-score": 0.8220858895705522,
1098
  "support": 79.0
1099
  },
1100
+ "accuracy": 0.9703476482617587,
1101
  "macro avg": {
1102
+ "precision": 0.8920981144135507,
1103
+ "recall": 0.9145956829670097,
1104
+ "f1-score": 0.9029559397657557,
1105
  "support": 978.0
1106
  },
1107
  "weighted avg": {
1108
+ "precision": 0.9713136918895144,
1109
+ "recall": 0.9703476482617587,
1110
+ "f1-score": 0.9707610943261513,
1111
  "support": 978.0
1112
  }
1113
  },
1114
+ "roc_auc": 0.9661086157615353,
1115
+ "average_precision": 0.8539255147550682
1116
  },
1117
  "validation_optimal_threshold": {
1118
+ "threshold": 0.4710787534713745,
1119
+ "accuracy": 0.9713701431492843,
1120
+ "precision": 0.8,
1121
+ "recall": 0.8607594936708861,
1122
+ "f1": 0.8292682926829268,
1123
  "confusion_matrix": [
1124
  [
1125
+ 882,
1126
+ 17
1127
  ],
1128
  [
1129
+ 11,
1130
+ 68
1131
  ]
1132
  ],
1133
  "classification_report": {
1134
  "NOT_RELEVANT": {
1135
+ "precision": 0.9876819708846585,
1136
+ "recall": 0.9810901001112347,
1137
+ "f1-score": 0.984375,
1138
  "support": 899.0
1139
  },
1140
  "RELEVANT": {
1141
+ "precision": 0.8,
1142
+ "recall": 0.8607594936708861,
1143
+ "f1-score": 0.8292682926829268,
1144
  "support": 79.0
1145
  },
1146
+ "accuracy": 0.9713701431492843,
1147
  "macro avg": {
1148
+ "precision": 0.8938409854423293,
1149
+ "recall": 0.9209247968910603,
1150
+ "f1-score": 0.9068216463414633,
1151
  "support": 978.0
1152
  },
1153
  "weighted avg": {
1154
+ "precision": 0.972521566283546,
1155
+ "recall": 0.9713701431492843,
1156
+ "f1-score": 0.9718459305950421,
1157
  "support": 978.0
1158
  }
1159
  },
1160
+ "roc_auc": 0.9661086157615353,
1161
+ "average_precision": 0.8539255147550682
1162
  },
1163
  "test_default_0_5": {
1164
  "threshold": 0.5,
1165
+ "accuracy": 0.9507874015748031,
1166
+ "precision": 0.7767857142857143,
1167
+ "recall": 0.7767857142857143,
1168
+ "f1": 0.7767857142857143,
1169
  "confusion_matrix": [
1170
  [
1171
+ 879,
1172
+ 25
1173
  ],
1174
  [
1175
+ 25,
1176
+ 87
1177
  ]
1178
  ],
1179
  "classification_report": {
1180
  "NOT_RELEVANT": {
1181
+ "precision": 0.9723451327433629,
1182
+ "recall": 0.9723451327433629,
1183
+ "f1-score": 0.9723451327433629,
1184
  "support": 904.0
1185
  },
1186
  "RELEVANT": {
1187
+ "precision": 0.7767857142857143,
1188
+ "recall": 0.7767857142857143,
1189
+ "f1-score": 0.7767857142857143,
1190
  "support": 112.0
1191
  },
1192
+ "accuracy": 0.9507874015748031,
1193
  "macro avg": {
1194
+ "precision": 0.8745654235145386,
1195
+ "recall": 0.8745654235145386,
1196
+ "f1-score": 0.8745654235145386,
1197
  "support": 1016.0
1198
  },
1199
  "weighted avg": {
1200
+ "precision": 0.9507874015748031,
1201
+ "recall": 0.9507874015748031,
1202
+ "f1-score": 0.9507874015748031,
1203
  "support": 1016.0
1204
  }
1205
  },
1206
+ "roc_auc": 0.9682512247155499,
1207
+ "average_precision": 0.8171206633671375
1208
  },
1209
  "test_optimal_threshold": {
1210
+ "threshold": 0.4710787534713745,
1211
+ "accuracy": 0.9498031496062992,
1212
+ "precision": 0.7699115044247787,
1213
+ "recall": 0.7767857142857143,
1214
+ "f1": 0.7733333333333333,
1215
  "confusion_matrix": [
1216
  [
1217
+ 878,
1218
+ 26
1219
  ],
1220
  [
1221
+ 25,
1222
+ 87
1223
  ]
1224
  ],
1225
  "classification_report": {
1226
  "NOT_RELEVANT": {
1227
+ "precision": 0.9723145071982281,
1228
+ "recall": 0.9712389380530974,
1229
+ "f1-score": 0.9717764250138351,
1230
  "support": 904.0
1231
  },
1232
  "RELEVANT": {
1233
+ "precision": 0.7699115044247787,
1234
+ "recall": 0.7767857142857143,
1235
+ "f1-score": 0.7733333333333333,
1236
  "support": 112.0
1237
  },
1238
+ "accuracy": 0.9498031496062992,
1239
  "macro avg": {
1240
+ "precision": 0.8711130058115034,
1241
+ "recall": 0.8740123261694058,
1242
+ "f1-score": 0.8725548791735842,
1243
  "support": 1016.0
1244
  },
1245
  "weighted avg": {
1246
+ "precision": 0.9500023651602102,
1247
+ "recall": 0.9498031496062992,
1248
+ "f1-score": 0.9499008086081104,
1249
  "support": 1016.0
1250
  }
1251
  },
1252
+ "roc_auc": 0.9682512247155499,
1253
+ "average_precision": 0.8171206633671375
1254
  }
1255
  }
1256
  ]
transformer/checkpoint-1220/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:572ab7a5b2bc6140bc72ff08cc111f90496ceab40c64330fc6373973a0b6830c
3
  size 1112205008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b248f60ff3e153b28949243967a2debde809912442c1ef5fe19d89dad891f1f9
3
  size 1112205008
transformer/checkpoint-1220/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce55d9becbab31f17b7cfca4a308defc4a0d3875c852bc0df4b57343af2e439a
3
  size 2224532875
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f731780f8bff3652e23ff4cf1692c96c1068919f515c85113ffd987765be34ce
3
  size 2224532875
transformer/checkpoint-1220/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3fd1def138c5a78584037782d4486580ca45db784f5e2a18955179628a8a257
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2e11cad5f2deee13f6148971cf1c6ded27d5cbdc725a37902243981a6125a17
3
  size 1383
transformer/checkpoint-1220/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_global_step": 1220,
3
- "best_metric": 0.8414634146341463,
4
- "best_model_checkpoint": "/content/agri-utilization-classifier/transformer/checkpoint-1220",
5
  "epoch": 4.0,
6
  "eval_steps": 500,
7
  "global_step": 1220,
@@ -11,390 +11,390 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.08196721311475409,
14
- "grad_norm": Infinity,
15
  "learning_rate": 3.157894736842105e-06,
16
- "loss": 0.7012384033203125,
17
  "step": 25
18
  },
19
  {
20
  "epoch": 0.16393442622950818,
21
- "grad_norm": 11.968428611755371,
22
  "learning_rate": 6.447368421052632e-06,
23
- "loss": 0.4254766845703125,
24
  "step": 50
25
  },
26
  {
27
  "epoch": 0.2459016393442623,
28
- "grad_norm": 19.943565368652344,
29
  "learning_rate": 9.736842105263159e-06,
30
- "loss": 0.3554811859130859,
31
  "step": 75
32
  },
33
  {
34
  "epoch": 0.32786885245901637,
35
- "grad_norm": 5.117671489715576,
36
  "learning_rate": 1.3026315789473684e-05,
37
- "loss": 0.3145046615600586,
38
  "step": 100
39
  },
40
  {
41
  "epoch": 0.4098360655737705,
42
- "grad_norm": 11.402164459228516,
43
  "learning_rate": 1.6315789473684213e-05,
44
- "loss": 0.2773847770690918,
45
  "step": 125
46
  },
47
  {
48
  "epoch": 0.4918032786885246,
49
- "grad_norm": 8.077093124389648,
50
  "learning_rate": 1.960526315789474e-05,
51
- "loss": 0.2556156539916992,
52
  "step": 150
53
  },
54
  {
55
  "epoch": 0.5737704918032787,
56
- "grad_norm": 2.0055508613586426,
57
  "learning_rate": 1.9679533867443555e-05,
58
- "loss": 0.25026893615722656,
59
  "step": 175
60
  },
61
  {
62
  "epoch": 0.6557377049180327,
63
- "grad_norm": 0.9293265342712402,
64
  "learning_rate": 1.9315367807720323e-05,
65
- "loss": 0.24691852569580078,
66
  "step": 200
67
  },
68
  {
69
  "epoch": 0.7377049180327869,
70
- "grad_norm": 1.4303064346313477,
71
  "learning_rate": 1.8951201747997088e-05,
72
- "loss": 0.21208112716674804,
73
  "step": 225
74
  },
75
  {
76
  "epoch": 0.819672131147541,
77
- "grad_norm": 4.035928249359131,
78
  "learning_rate": 1.8587035688273852e-05,
79
- "loss": 0.17017290115356445,
80
  "step": 250
81
  },
82
  {
83
  "epoch": 0.9016393442622951,
84
- "grad_norm": 4.241303443908691,
85
  "learning_rate": 1.822286962855062e-05,
86
- "loss": 0.16115386962890624,
87
  "step": 275
88
  },
89
  {
90
  "epoch": 0.9836065573770492,
91
- "grad_norm": 0.3990240693092346,
92
  "learning_rate": 1.7858703568827385e-05,
93
- "loss": 0.18343988418579102,
94
  "step": 300
95
  },
96
  {
97
  "epoch": 1.0,
98
- "eval_accuracy": 0.9642126789366053,
99
- "eval_f1": 0.7741935483870968,
100
- "eval_loss": 0.14546315371990204,
101
- "eval_precision": 0.7894736842105263,
102
- "eval_recall": 0.759493670886076,
103
- "eval_roc_auc": 0.9131946888948339,
104
- "eval_runtime": 3.5921,
105
- "eval_samples_per_second": 272.266,
106
- "eval_steps_per_second": 8.63,
107
  "step": 305
108
  },
109
  {
110
  "epoch": 1.0655737704918034,
111
- "grad_norm": 4.865096569061279,
112
  "learning_rate": 1.7494537509104153e-05,
113
- "loss": 0.19335979461669922,
114
  "step": 325
115
  },
116
  {
117
  "epoch": 1.1475409836065573,
118
- "grad_norm": 1.9780689477920532,
119
  "learning_rate": 1.7130371449380918e-05,
120
- "loss": 0.2344082260131836,
121
  "step": 350
122
  },
123
  {
124
  "epoch": 1.2295081967213115,
125
- "grad_norm": 1.3759413957595825,
126
  "learning_rate": 1.6766205389657686e-05,
127
- "loss": 0.20309404373168946,
128
  "step": 375
129
  },
130
  {
131
  "epoch": 1.3114754098360657,
132
- "grad_norm": 0.30811628699302673,
133
  "learning_rate": 1.640203932993445e-05,
134
- "loss": 0.224365234375,
135
  "step": 400
136
  },
137
  {
138
  "epoch": 1.3934426229508197,
139
- "grad_norm": 5.530014514923096,
140
  "learning_rate": 1.603787327021122e-05,
141
- "loss": 0.14759160995483397,
142
  "step": 425
143
  },
144
  {
145
  "epoch": 1.4754098360655736,
146
- "grad_norm": 3.7750189304351807,
147
  "learning_rate": 1.5673707210487983e-05,
148
- "loss": 0.14137668609619142,
149
  "step": 450
150
  },
151
  {
152
  "epoch": 1.5573770491803278,
153
- "grad_norm": 1.8209186792373657,
154
  "learning_rate": 1.530954115076475e-05,
155
- "loss": 0.19394855499267577,
156
  "step": 475
157
  },
158
  {
159
  "epoch": 1.639344262295082,
160
- "grad_norm": 0.4824683368206024,
161
  "learning_rate": 1.4945375091041516e-05,
162
- "loss": 0.1700056266784668,
163
  "step": 500
164
  },
165
  {
166
  "epoch": 1.721311475409836,
167
- "grad_norm": 0.5682937502861023,
168
  "learning_rate": 1.4581209031318282e-05,
169
- "loss": 0.17243267059326173,
170
  "step": 525
171
  },
172
  {
173
  "epoch": 1.8032786885245902,
174
- "grad_norm": 2.2086634635925293,
175
  "learning_rate": 1.4217042971595047e-05,
176
- "loss": 0.15430424690246583,
177
  "step": 550
178
  },
179
  {
180
  "epoch": 1.8852459016393444,
181
- "grad_norm": 6.93908166885376,
182
  "learning_rate": 1.3852876911871815e-05,
183
- "loss": 0.10752416610717773,
184
  "step": 575
185
  },
186
  {
187
  "epoch": 1.9672131147540983,
188
- "grad_norm": 5.092395782470703,
189
  "learning_rate": 1.3488710852148582e-05,
190
- "loss": 0.21721889495849608,
191
  "step": 600
192
  },
193
  {
194
  "epoch": 2.0,
195
- "eval_accuracy": 0.9570552147239264,
196
- "eval_f1": 0.7692307692307693,
197
- "eval_loss": 0.12331932783126831,
198
- "eval_precision": 0.6796116504854369,
199
- "eval_recall": 0.8860759493670886,
200
- "eval_roc_auc": 0.9679672209628138,
201
- "eval_runtime": 3.5595,
202
- "eval_samples_per_second": 274.754,
203
- "eval_steps_per_second": 8.709,
204
  "step": 610
205
  },
206
  {
207
  "epoch": 2.0491803278688523,
208
- "grad_norm": 12.040640830993652,
209
  "learning_rate": 1.3124544792425346e-05,
210
- "loss": 0.11267939567565918,
211
  "step": 625
212
  },
213
  {
214
  "epoch": 2.1311475409836067,
215
- "grad_norm": 0.33291733264923096,
216
  "learning_rate": 1.2760378732702113e-05,
217
- "loss": 0.16029356002807618,
218
  "step": 650
219
  },
220
  {
221
  "epoch": 2.2131147540983607,
222
- "grad_norm": 0.1562187671661377,
223
  "learning_rate": 1.239621267297888e-05,
224
- "loss": 0.13354766845703125,
225
  "step": 675
226
  },
227
  {
228
  "epoch": 2.2950819672131146,
229
- "grad_norm": 0.39492854475975037,
230
  "learning_rate": 1.2032046613255645e-05,
231
- "loss": 0.08748809814453125,
232
  "step": 700
233
  },
234
  {
235
  "epoch": 2.3770491803278686,
236
- "grad_norm": 0.22857463359832764,
237
  "learning_rate": 1.1667880553532412e-05,
238
- "loss": 0.1255797290802002,
239
  "step": 725
240
  },
241
  {
242
  "epoch": 2.459016393442623,
243
- "grad_norm": 42.23853302001953,
244
  "learning_rate": 1.1303714493809176e-05,
245
- "loss": 0.09398910522460938,
246
  "step": 750
247
  },
248
  {
249
  "epoch": 2.540983606557377,
250
- "grad_norm": 9.628519058227539,
251
  "learning_rate": 1.0939548434085944e-05,
252
- "loss": 0.12067486763000489,
253
  "step": 775
254
  },
255
  {
256
  "epoch": 2.6229508196721314,
257
- "grad_norm": 8.281865119934082,
258
  "learning_rate": 1.057538237436271e-05,
259
- "loss": 0.0960771656036377,
260
  "step": 800
261
  },
262
  {
263
  "epoch": 2.7049180327868854,
264
- "grad_norm": 0.2366073578596115,
265
  "learning_rate": 1.0211216314639475e-05,
266
- "loss": 0.1477354335784912,
267
  "step": 825
268
  },
269
  {
270
  "epoch": 2.7868852459016393,
271
- "grad_norm": 2.127614974975586,
272
  "learning_rate": 9.847050254916243e-06,
273
- "loss": 0.12143749237060547,
274
  "step": 850
275
  },
276
  {
277
  "epoch": 2.8688524590163933,
278
- "grad_norm": 0.1283058375120163,
279
  "learning_rate": 9.482884195193008e-06,
280
- "loss": 0.0978905963897705,
281
  "step": 875
282
  },
283
  {
284
  "epoch": 2.9508196721311473,
285
- "grad_norm": 0.16377978026866913,
286
  "learning_rate": 9.118718135469774e-06,
287
- "loss": 0.13501665115356445,
288
  "step": 900
289
  },
290
  {
291
  "epoch": 3.0,
292
- "eval_accuracy": 0.9683026584867076,
293
- "eval_f1": 0.8143712574850299,
294
- "eval_loss": 0.11399859189987183,
295
- "eval_precision": 0.7727272727272727,
296
- "eval_recall": 0.8607594936708861,
297
- "eval_roc_auc": 0.9685867560299067,
298
- "eval_runtime": 3.583,
299
- "eval_samples_per_second": 272.954,
300
- "eval_steps_per_second": 8.652,
301
  "step": 915
302
  },
303
  {
304
  "epoch": 3.0327868852459017,
305
- "grad_norm": 0.36435577273368835,
306
  "learning_rate": 8.754552075746541e-06,
307
- "loss": 0.12846416473388672,
308
  "step": 925
309
  },
310
  {
311
  "epoch": 3.1147540983606556,
312
- "grad_norm": 0.12181571871042252,
313
  "learning_rate": 8.390386016023307e-06,
314
- "loss": 0.07323605537414551,
315
  "step": 950
316
  },
317
  {
318
  "epoch": 3.19672131147541,
319
- "grad_norm": 0.08613187074661255,
320
  "learning_rate": 8.026219956300074e-06,
321
- "loss": 0.11347267150878906,
322
  "step": 975
323
  },
324
  {
325
  "epoch": 3.278688524590164,
326
- "grad_norm": 2.452489137649536,
327
  "learning_rate": 7.66205389657684e-06,
328
- "loss": 0.07726279258728028,
329
  "step": 1000
330
  },
331
  {
332
  "epoch": 3.360655737704918,
333
- "grad_norm": 0.05209459364414215,
334
  "learning_rate": 7.2978878368536055e-06,
335
- "loss": 0.07418290138244629,
336
  "step": 1025
337
  },
338
  {
339
  "epoch": 3.442622950819672,
340
- "grad_norm": 20.379858016967773,
341
  "learning_rate": 6.933721777130372e-06,
342
- "loss": 0.12044317245483399,
343
  "step": 1050
344
  },
345
  {
346
  "epoch": 3.5245901639344264,
347
- "grad_norm": 0.8267766237258911,
348
  "learning_rate": 6.569555717407138e-06,
349
- "loss": 0.07422394752502441,
350
  "step": 1075
351
  },
352
  {
353
  "epoch": 3.6065573770491803,
354
- "grad_norm": 0.18650104105472565,
355
  "learning_rate": 6.2053896576839045e-06,
356
- "loss": 0.07682370185852051,
357
  "step": 1100
358
  },
359
  {
360
  "epoch": 3.6885245901639343,
361
- "grad_norm": 0.07509063929319382,
362
  "learning_rate": 5.84122359796067e-06,
363
- "loss": 0.06737090110778808,
364
  "step": 1125
365
  },
366
  {
367
  "epoch": 3.7704918032786887,
368
- "grad_norm": 16.239089965820312,
369
  "learning_rate": 5.477057538237437e-06,
370
- "loss": 0.0853554630279541,
371
  "step": 1150
372
  },
373
  {
374
  "epoch": 3.8524590163934427,
375
- "grad_norm": 0.07212834060192108,
376
  "learning_rate": 5.112891478514203e-06,
377
- "loss": 0.07101381301879883,
378
  "step": 1175
379
  },
380
  {
381
  "epoch": 3.9344262295081966,
382
- "grad_norm": 0.05511339381337166,
383
  "learning_rate": 4.748725418790969e-06,
384
- "loss": 0.09712701797485351,
385
  "step": 1200
386
  },
387
  {
388
  "epoch": 4.0,
389
- "eval_accuracy": 0.9734151329243353,
390
- "eval_f1": 0.8414634146341463,
391
- "eval_loss": 0.11315910518169403,
392
- "eval_precision": 0.8117647058823529,
393
- "eval_recall": 0.8734177215189873,
394
- "eval_roc_auc": 0.9707692091071654,
395
- "eval_runtime": 3.7125,
396
- "eval_samples_per_second": 263.434,
397
- "eval_steps_per_second": 8.35,
398
  "step": 1220
399
  }
400
  ],
@@ -410,7 +410,7 @@
410
  "early_stopping_threshold": 0.0
411
  },
412
  "attributes": {
413
- "early_stopping_patience_counter": 0
414
  }
415
  },
416
  "TrainerControl": {
 
1
  {
2
+ "best_global_step": 915,
3
+ "best_metric": 0.8220858895705522,
4
+ "best_model_checkpoint": "/content/agri-utilization-classifier/transformer/checkpoint-915",
5
  "epoch": 4.0,
6
  "eval_steps": 500,
7
  "global_step": 1220,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.08196721311475409,
14
+ "grad_norm": 6.055062770843506,
15
  "learning_rate": 3.157894736842105e-06,
16
+ "loss": 0.62972900390625,
17
  "step": 25
18
  },
19
  {
20
  "epoch": 0.16393442622950818,
21
+ "grad_norm": 10.6914701461792,
22
  "learning_rate": 6.447368421052632e-06,
23
+ "loss": 0.44850738525390627,
24
  "step": 50
25
  },
26
  {
27
  "epoch": 0.2459016393442623,
28
+ "grad_norm": 6.670228481292725,
29
  "learning_rate": 9.736842105263159e-06,
30
+ "loss": 0.3566379165649414,
31
  "step": 75
32
  },
33
  {
34
  "epoch": 0.32786885245901637,
35
+ "grad_norm": 2.589911937713623,
36
  "learning_rate": 1.3026315789473684e-05,
37
+ "loss": 0.2718839645385742,
38
  "step": 100
39
  },
40
  {
41
  "epoch": 0.4098360655737705,
42
+ "grad_norm": 22.02676773071289,
43
  "learning_rate": 1.6315789473684213e-05,
44
+ "loss": 0.1922766876220703,
45
  "step": 125
46
  },
47
  {
48
  "epoch": 0.4918032786885246,
49
+ "grad_norm": 2.6362855434417725,
50
  "learning_rate": 1.960526315789474e-05,
51
+ "loss": 0.1837622833251953,
52
  "step": 150
53
  },
54
  {
55
  "epoch": 0.5737704918032787,
56
+ "grad_norm": 3.478484630584717,
57
  "learning_rate": 1.9679533867443555e-05,
58
+ "loss": 0.18766048431396484,
59
  "step": 175
60
  },
61
  {
62
  "epoch": 0.6557377049180327,
63
+ "grad_norm": 8.077605247497559,
64
  "learning_rate": 1.9315367807720323e-05,
65
+ "loss": 0.23830581665039063,
66
  "step": 200
67
  },
68
  {
69
  "epoch": 0.7377049180327869,
70
+ "grad_norm": 0.7427046298980713,
71
  "learning_rate": 1.8951201747997088e-05,
72
+ "loss": 0.30742517471313474,
73
  "step": 225
74
  },
75
  {
76
  "epoch": 0.819672131147541,
77
+ "grad_norm": 36.34975051879883,
78
  "learning_rate": 1.8587035688273852e-05,
79
+ "loss": 0.22336017608642578,
80
  "step": 250
81
  },
82
  {
83
  "epoch": 0.9016393442622951,
84
+ "grad_norm": 5.215510845184326,
85
  "learning_rate": 1.822286962855062e-05,
86
+ "loss": 0.13779294967651368,
87
  "step": 275
88
  },
89
  {
90
  "epoch": 0.9836065573770492,
91
+ "grad_norm": 3.551121950149536,
92
  "learning_rate": 1.7858703568827385e-05,
93
+ "loss": 0.19200111389160157,
94
  "step": 300
95
  },
96
  {
97
  "epoch": 1.0,
98
+ "eval_accuracy": 0.9631901840490797,
99
+ "eval_f1": 0.7721518987341772,
100
+ "eval_loss": 0.1292734444141388,
101
+ "eval_precision": 0.7721518987341772,
102
+ "eval_recall": 0.7721518987341772,
103
+ "eval_roc_auc": 0.9563720589684741,
104
+ "eval_runtime": 3.3396,
105
+ "eval_samples_per_second": 292.853,
106
+ "eval_steps_per_second": 9.283,
107
  "step": 305
108
  },
109
  {
110
  "epoch": 1.0655737704918034,
111
+ "grad_norm": 0.5402449369430542,
112
  "learning_rate": 1.7494537509104153e-05,
113
+ "loss": 0.1241053295135498,
114
  "step": 325
115
  },
116
  {
117
  "epoch": 1.1475409836065573,
118
+ "grad_norm": 4.476892948150635,
119
  "learning_rate": 1.7130371449380918e-05,
120
+ "loss": 0.20724605560302733,
121
  "step": 350
122
  },
123
  {
124
  "epoch": 1.2295081967213115,
125
+ "grad_norm": 0.46729782223701477,
126
  "learning_rate": 1.6766205389657686e-05,
127
+ "loss": 0.13567353248596192,
128
  "step": 375
129
  },
130
  {
131
  "epoch": 1.3114754098360657,
132
+ "grad_norm": 0.1852118819952011,
133
  "learning_rate": 1.640203932993445e-05,
134
+ "loss": 0.13295170783996582,
135
  "step": 400
136
  },
137
  {
138
  "epoch": 1.3934426229508197,
139
+ "grad_norm": 1.2681413888931274,
140
  "learning_rate": 1.603787327021122e-05,
141
+ "loss": 0.2027936363220215,
142
  "step": 425
143
  },
144
  {
145
  "epoch": 1.4754098360655736,
146
+ "grad_norm": 7.484091281890869,
147
  "learning_rate": 1.5673707210487983e-05,
148
+ "loss": 0.12364128112792969,
149
  "step": 450
150
  },
151
  {
152
  "epoch": 1.5573770491803278,
153
+ "grad_norm": 0.46489500999450684,
154
  "learning_rate": 1.530954115076475e-05,
155
+ "loss": 0.14407362937927246,
156
  "step": 475
157
  },
158
  {
159
  "epoch": 1.639344262295082,
160
+ "grad_norm": 0.20967872440814972,
161
  "learning_rate": 1.4945375091041516e-05,
162
+ "loss": 0.12458925247192383,
163
  "step": 500
164
  },
165
  {
166
  "epoch": 1.721311475409836,
167
+ "grad_norm": 0.1643747240304947,
168
  "learning_rate": 1.4581209031318282e-05,
169
+ "loss": 0.21631996154785157,
170
  "step": 525
171
  },
172
  {
173
  "epoch": 1.8032786885245902,
174
+ "grad_norm": 7.073329448699951,
175
  "learning_rate": 1.4217042971595047e-05,
176
+ "loss": 0.16043865203857421,
177
  "step": 550
178
  },
179
  {
180
  "epoch": 1.8852459016393444,
181
+ "grad_norm": 1.744958758354187,
182
  "learning_rate": 1.3852876911871815e-05,
183
+ "loss": 0.0966644287109375,
184
  "step": 575
185
  },
186
  {
187
  "epoch": 1.9672131147540983,
188
+ "grad_norm": 12.79035472869873,
189
  "learning_rate": 1.3488710852148582e-05,
190
+ "loss": 0.15884541511535644,
191
  "step": 600
192
  },
193
  {
194
  "epoch": 2.0,
195
+ "eval_accuracy": 0.9611451942740287,
196
+ "eval_f1": 0.7432432432432432,
197
+ "eval_loss": 0.13287827372550964,
198
+ "eval_precision": 0.7971014492753623,
199
+ "eval_recall": 0.6962025316455697,
200
+ "eval_roc_auc": 0.9594697343039381,
201
+ "eval_runtime": 3.2739,
202
+ "eval_samples_per_second": 298.727,
203
+ "eval_steps_per_second": 9.469,
204
  "step": 610
205
  },
206
  {
207
  "epoch": 2.0491803278688523,
208
+ "grad_norm": 17.520444869995117,
209
  "learning_rate": 1.3124544792425346e-05,
210
+ "loss": 0.08896012306213379,
211
  "step": 625
212
  },
213
  {
214
  "epoch": 2.1311475409836067,
215
+ "grad_norm": 0.16623224318027496,
216
  "learning_rate": 1.2760378732702113e-05,
217
+ "loss": 0.11752216339111328,
218
  "step": 650
219
  },
220
  {
221
  "epoch": 2.2131147540983607,
222
+ "grad_norm": 0.20762814581394196,
223
  "learning_rate": 1.239621267297888e-05,
224
+ "loss": 0.1193038272857666,
225
  "step": 675
226
  },
227
  {
228
  "epoch": 2.2950819672131146,
229
+ "grad_norm": 0.1500111073255539,
230
  "learning_rate": 1.2032046613255645e-05,
231
+ "loss": 0.0630855655670166,
232
  "step": 700
233
  },
234
  {
235
  "epoch": 2.3770491803278686,
236
+ "grad_norm": 0.17727839946746826,
237
  "learning_rate": 1.1667880553532412e-05,
238
+ "loss": 0.08730959892272949,
239
  "step": 725
240
  },
241
  {
242
  "epoch": 2.459016393442623,
243
+ "grad_norm": 4.3997321128845215,
244
  "learning_rate": 1.1303714493809176e-05,
245
+ "loss": 0.12114215850830078,
246
  "step": 750
247
  },
248
  {
249
  "epoch": 2.540983606557377,
250
+ "grad_norm": 34.47224044799805,
251
  "learning_rate": 1.0939548434085944e-05,
252
+ "loss": 0.11070786476135254,
253
  "step": 775
254
  },
255
  {
256
  "epoch": 2.6229508196721314,
257
+ "grad_norm": 25.977081298828125,
258
  "learning_rate": 1.057538237436271e-05,
259
+ "loss": 0.10845686912536621,
260
  "step": 800
261
  },
262
  {
263
  "epoch": 2.7049180327868854,
264
+ "grad_norm": 0.1657736450433731,
265
  "learning_rate": 1.0211216314639475e-05,
266
+ "loss": 0.1025285530090332,
267
  "step": 825
268
  },
269
  {
270
  "epoch": 2.7868852459016393,
271
+ "grad_norm": 34.05498504638672,
272
  "learning_rate": 9.847050254916243e-06,
273
+ "loss": 0.07825160026550293,
274
  "step": 850
275
  },
276
  {
277
  "epoch": 2.8688524590163933,
278
+ "grad_norm": 0.2868161201477051,
279
  "learning_rate": 9.482884195193008e-06,
280
+ "loss": 0.12041816711425782,
281
  "step": 875
282
  },
283
  {
284
  "epoch": 2.9508196721311473,
285
+ "grad_norm": 0.19192977249622345,
286
  "learning_rate": 9.118718135469774e-06,
287
+ "loss": 0.08709416389465333,
288
  "step": 900
289
  },
290
  {
291
  "epoch": 3.0,
292
+ "eval_accuracy": 0.9703476482617587,
293
+ "eval_f1": 0.8220858895705522,
294
+ "eval_loss": 0.11163181066513062,
295
+ "eval_precision": 0.7976190476190477,
296
+ "eval_recall": 0.8481012658227848,
297
+ "eval_roc_auc": 0.9661086157615353,
298
+ "eval_runtime": 3.1733,
299
+ "eval_samples_per_second": 308.193,
300
+ "eval_steps_per_second": 9.769,
301
  "step": 915
302
  },
303
  {
304
  "epoch": 3.0327868852459017,
305
+ "grad_norm": 1.0706992149353027,
306
  "learning_rate": 8.754552075746541e-06,
307
+ "loss": 0.10751664161682128,
308
  "step": 925
309
  },
310
  {
311
  "epoch": 3.1147540983606556,
312
+ "grad_norm": 0.12844231724739075,
313
  "learning_rate": 8.390386016023307e-06,
314
+ "loss": 0.06818144798278808,
315
  "step": 950
316
  },
317
  {
318
  "epoch": 3.19672131147541,
319
+ "grad_norm": 0.07692205160856247,
320
  "learning_rate": 8.026219956300074e-06,
321
+ "loss": 0.12229555130004882,
322
  "step": 975
323
  },
324
  {
325
  "epoch": 3.278688524590164,
326
+ "grad_norm": 1.773990511894226,
327
  "learning_rate": 7.66205389657684e-06,
328
+ "loss": 0.06936595916748046,
329
  "step": 1000
330
  },
331
  {
332
  "epoch": 3.360655737704918,
333
+ "grad_norm": 0.07844381034374237,
334
  "learning_rate": 7.2978878368536055e-06,
335
+ "loss": 0.05219663143157959,
336
  "step": 1025
337
  },
338
  {
339
  "epoch": 3.442622950819672,
340
+ "grad_norm": 12.502548217773438,
341
  "learning_rate": 6.933721777130372e-06,
342
+ "loss": 0.06849228858947753,
343
  "step": 1050
344
  },
345
  {
346
  "epoch": 3.5245901639344264,
347
+ "grad_norm": 1.6993861198425293,
348
  "learning_rate": 6.569555717407138e-06,
349
+ "loss": 0.08783550262451172,
350
  "step": 1075
351
  },
352
  {
353
  "epoch": 3.6065573770491803,
354
+ "grad_norm": 0.06551510095596313,
355
  "learning_rate": 6.2053896576839045e-06,
356
+ "loss": 0.049420347213745115,
357
  "step": 1100
358
  },
359
  {
360
  "epoch": 3.6885245901639343,
361
+ "grad_norm": 0.034276798367500305,
362
  "learning_rate": 5.84122359796067e-06,
363
+ "loss": 0.05244039058685303,
364
  "step": 1125
365
  },
366
  {
367
  "epoch": 3.7704918032786887,
368
+ "grad_norm": 10.901683807373047,
369
  "learning_rate": 5.477057538237437e-06,
370
+ "loss": 0.06656317710876465,
371
  "step": 1150
372
  },
373
  {
374
  "epoch": 3.8524590163934427,
375
+ "grad_norm": 2.3856894969940186,
376
  "learning_rate": 5.112891478514203e-06,
377
+ "loss": 0.06277508735656738,
378
  "step": 1175
379
  },
380
  {
381
  "epoch": 3.9344262295081966,
382
+ "grad_norm": 0.018699949607253075,
383
  "learning_rate": 4.748725418790969e-06,
384
+ "loss": 0.046858911514282224,
385
  "step": 1200
386
  },
387
  {
388
  "epoch": 4.0,
389
+ "eval_accuracy": 0.9662576687116564,
390
+ "eval_f1": 0.8047337278106509,
391
+ "eval_loss": 0.14547723531723022,
392
+ "eval_precision": 0.7555555555555555,
393
+ "eval_recall": 0.8607594936708861,
394
+ "eval_roc_auc": 0.9600822291998141,
395
+ "eval_runtime": 3.1883,
396
+ "eval_samples_per_second": 306.745,
397
+ "eval_steps_per_second": 9.723,
398
  "step": 1220
399
  }
400
  ],
 
410
  "early_stopping_threshold": 0.0
411
  },
412
  "attributes": {
413
+ "early_stopping_patience_counter": 1
414
  }
415
  },
416
  "TrainerControl": {
transformer/checkpoint-1525/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c80cdf8b98658539a77a69a282499e626f22060577eb079e6a055f09aba66066
3
  size 1112205008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca68ad0b5b62ce1f08ec00239ceda526ea353b1ddc553e305f4ceea6acda5317
3
  size 1112205008
transformer/checkpoint-1525/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:014bf9ccc323714518bfa10873ef36916a7df86a1608813c3700872b55906b0c
3
  size 2224532875
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50962bf386796cce93bfd0be8d501de258304f3ad55fd24f22491068591bd9e2
3
  size 2224532875
transformer/checkpoint-1525/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2670ef1c6526aa1bf88e4427fd5e916ff5c25d9c98fc894c142c473952a06106
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d3ceaf712c034a0e9722143d5622d35a083b5d5c1fc678fc7c4e4e70e581221
3
  size 1383
transformer/checkpoint-1525/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_global_step": 1220,
3
- "best_metric": 0.8414634146341463,
4
- "best_model_checkpoint": "/content/agri-utilization-classifier/transformer/checkpoint-1220",
5
  "epoch": 5.0,
6
  "eval_steps": 500,
7
  "global_step": 1525,
@@ -11,494 +11,494 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.08196721311475409,
14
- "grad_norm": Infinity,
15
  "learning_rate": 3.157894736842105e-06,
16
- "loss": 0.7012384033203125,
17
  "step": 25
18
  },
19
  {
20
  "epoch": 0.16393442622950818,
21
- "grad_norm": 11.968428611755371,
22
  "learning_rate": 6.447368421052632e-06,
23
- "loss": 0.4254766845703125,
24
  "step": 50
25
  },
26
  {
27
  "epoch": 0.2459016393442623,
28
- "grad_norm": 19.943565368652344,
29
  "learning_rate": 9.736842105263159e-06,
30
- "loss": 0.3554811859130859,
31
  "step": 75
32
  },
33
  {
34
  "epoch": 0.32786885245901637,
35
- "grad_norm": 5.117671489715576,
36
  "learning_rate": 1.3026315789473684e-05,
37
- "loss": 0.3145046615600586,
38
  "step": 100
39
  },
40
  {
41
  "epoch": 0.4098360655737705,
42
- "grad_norm": 11.402164459228516,
43
  "learning_rate": 1.6315789473684213e-05,
44
- "loss": 0.2773847770690918,
45
  "step": 125
46
  },
47
  {
48
  "epoch": 0.4918032786885246,
49
- "grad_norm": 8.077093124389648,
50
  "learning_rate": 1.960526315789474e-05,
51
- "loss": 0.2556156539916992,
52
  "step": 150
53
  },
54
  {
55
  "epoch": 0.5737704918032787,
56
- "grad_norm": 2.0055508613586426,
57
  "learning_rate": 1.9679533867443555e-05,
58
- "loss": 0.25026893615722656,
59
  "step": 175
60
  },
61
  {
62
  "epoch": 0.6557377049180327,
63
- "grad_norm": 0.9293265342712402,
64
  "learning_rate": 1.9315367807720323e-05,
65
- "loss": 0.24691852569580078,
66
  "step": 200
67
  },
68
  {
69
  "epoch": 0.7377049180327869,
70
- "grad_norm": 1.4303064346313477,
71
  "learning_rate": 1.8951201747997088e-05,
72
- "loss": 0.21208112716674804,
73
  "step": 225
74
  },
75
  {
76
  "epoch": 0.819672131147541,
77
- "grad_norm": 4.035928249359131,
78
  "learning_rate": 1.8587035688273852e-05,
79
- "loss": 0.17017290115356445,
80
  "step": 250
81
  },
82
  {
83
  "epoch": 0.9016393442622951,
84
- "grad_norm": 4.241303443908691,
85
  "learning_rate": 1.822286962855062e-05,
86
- "loss": 0.16115386962890624,
87
  "step": 275
88
  },
89
  {
90
  "epoch": 0.9836065573770492,
91
- "grad_norm": 0.3990240693092346,
92
  "learning_rate": 1.7858703568827385e-05,
93
- "loss": 0.18343988418579102,
94
  "step": 300
95
  },
96
  {
97
  "epoch": 1.0,
98
- "eval_accuracy": 0.9642126789366053,
99
- "eval_f1": 0.7741935483870968,
100
- "eval_loss": 0.14546315371990204,
101
- "eval_precision": 0.7894736842105263,
102
- "eval_recall": 0.759493670886076,
103
- "eval_roc_auc": 0.9131946888948339,
104
- "eval_runtime": 3.5921,
105
- "eval_samples_per_second": 272.266,
106
- "eval_steps_per_second": 8.63,
107
  "step": 305
108
  },
109
  {
110
  "epoch": 1.0655737704918034,
111
- "grad_norm": 4.865096569061279,
112
  "learning_rate": 1.7494537509104153e-05,
113
- "loss": 0.19335979461669922,
114
  "step": 325
115
  },
116
  {
117
  "epoch": 1.1475409836065573,
118
- "grad_norm": 1.9780689477920532,
119
  "learning_rate": 1.7130371449380918e-05,
120
- "loss": 0.2344082260131836,
121
  "step": 350
122
  },
123
  {
124
  "epoch": 1.2295081967213115,
125
- "grad_norm": 1.3759413957595825,
126
  "learning_rate": 1.6766205389657686e-05,
127
- "loss": 0.20309404373168946,
128
  "step": 375
129
  },
130
  {
131
  "epoch": 1.3114754098360657,
132
- "grad_norm": 0.30811628699302673,
133
  "learning_rate": 1.640203932993445e-05,
134
- "loss": 0.224365234375,
135
  "step": 400
136
  },
137
  {
138
  "epoch": 1.3934426229508197,
139
- "grad_norm": 5.530014514923096,
140
  "learning_rate": 1.603787327021122e-05,
141
- "loss": 0.14759160995483397,
142
  "step": 425
143
  },
144
  {
145
  "epoch": 1.4754098360655736,
146
- "grad_norm": 3.7750189304351807,
147
  "learning_rate": 1.5673707210487983e-05,
148
- "loss": 0.14137668609619142,
149
  "step": 450
150
  },
151
  {
152
  "epoch": 1.5573770491803278,
153
- "grad_norm": 1.8209186792373657,
154
  "learning_rate": 1.530954115076475e-05,
155
- "loss": 0.19394855499267577,
156
  "step": 475
157
  },
158
  {
159
  "epoch": 1.639344262295082,
160
- "grad_norm": 0.4824683368206024,
161
  "learning_rate": 1.4945375091041516e-05,
162
- "loss": 0.1700056266784668,
163
  "step": 500
164
  },
165
  {
166
  "epoch": 1.721311475409836,
167
- "grad_norm": 0.5682937502861023,
168
  "learning_rate": 1.4581209031318282e-05,
169
- "loss": 0.17243267059326173,
170
  "step": 525
171
  },
172
  {
173
  "epoch": 1.8032786885245902,
174
- "grad_norm": 2.2086634635925293,
175
  "learning_rate": 1.4217042971595047e-05,
176
- "loss": 0.15430424690246583,
177
  "step": 550
178
  },
179
  {
180
  "epoch": 1.8852459016393444,
181
- "grad_norm": 6.93908166885376,
182
  "learning_rate": 1.3852876911871815e-05,
183
- "loss": 0.10752416610717773,
184
  "step": 575
185
  },
186
  {
187
  "epoch": 1.9672131147540983,
188
- "grad_norm": 5.092395782470703,
189
  "learning_rate": 1.3488710852148582e-05,
190
- "loss": 0.21721889495849608,
191
  "step": 600
192
  },
193
  {
194
  "epoch": 2.0,
195
- "eval_accuracy": 0.9570552147239264,
196
- "eval_f1": 0.7692307692307693,
197
- "eval_loss": 0.12331932783126831,
198
- "eval_precision": 0.6796116504854369,
199
- "eval_recall": 0.8860759493670886,
200
- "eval_roc_auc": 0.9679672209628138,
201
- "eval_runtime": 3.5595,
202
- "eval_samples_per_second": 274.754,
203
- "eval_steps_per_second": 8.709,
204
  "step": 610
205
  },
206
  {
207
  "epoch": 2.0491803278688523,
208
- "grad_norm": 12.040640830993652,
209
  "learning_rate": 1.3124544792425346e-05,
210
- "loss": 0.11267939567565918,
211
  "step": 625
212
  },
213
  {
214
  "epoch": 2.1311475409836067,
215
- "grad_norm": 0.33291733264923096,
216
  "learning_rate": 1.2760378732702113e-05,
217
- "loss": 0.16029356002807618,
218
  "step": 650
219
  },
220
  {
221
  "epoch": 2.2131147540983607,
222
- "grad_norm": 0.1562187671661377,
223
  "learning_rate": 1.239621267297888e-05,
224
- "loss": 0.13354766845703125,
225
  "step": 675
226
  },
227
  {
228
  "epoch": 2.2950819672131146,
229
- "grad_norm": 0.39492854475975037,
230
  "learning_rate": 1.2032046613255645e-05,
231
- "loss": 0.08748809814453125,
232
  "step": 700
233
  },
234
  {
235
  "epoch": 2.3770491803278686,
236
- "grad_norm": 0.22857463359832764,
237
  "learning_rate": 1.1667880553532412e-05,
238
- "loss": 0.1255797290802002,
239
  "step": 725
240
  },
241
  {
242
  "epoch": 2.459016393442623,
243
- "grad_norm": 42.23853302001953,
244
  "learning_rate": 1.1303714493809176e-05,
245
- "loss": 0.09398910522460938,
246
  "step": 750
247
  },
248
  {
249
  "epoch": 2.540983606557377,
250
- "grad_norm": 9.628519058227539,
251
  "learning_rate": 1.0939548434085944e-05,
252
- "loss": 0.12067486763000489,
253
  "step": 775
254
  },
255
  {
256
  "epoch": 2.6229508196721314,
257
- "grad_norm": 8.281865119934082,
258
  "learning_rate": 1.057538237436271e-05,
259
- "loss": 0.0960771656036377,
260
  "step": 800
261
  },
262
  {
263
  "epoch": 2.7049180327868854,
264
- "grad_norm": 0.2366073578596115,
265
  "learning_rate": 1.0211216314639475e-05,
266
- "loss": 0.1477354335784912,
267
  "step": 825
268
  },
269
  {
270
  "epoch": 2.7868852459016393,
271
- "grad_norm": 2.127614974975586,
272
  "learning_rate": 9.847050254916243e-06,
273
- "loss": 0.12143749237060547,
274
  "step": 850
275
  },
276
  {
277
  "epoch": 2.8688524590163933,
278
- "grad_norm": 0.1283058375120163,
279
  "learning_rate": 9.482884195193008e-06,
280
- "loss": 0.0978905963897705,
281
  "step": 875
282
  },
283
  {
284
  "epoch": 2.9508196721311473,
285
- "grad_norm": 0.16377978026866913,
286
  "learning_rate": 9.118718135469774e-06,
287
- "loss": 0.13501665115356445,
288
  "step": 900
289
  },
290
  {
291
  "epoch": 3.0,
292
- "eval_accuracy": 0.9683026584867076,
293
- "eval_f1": 0.8143712574850299,
294
- "eval_loss": 0.11399859189987183,
295
- "eval_precision": 0.7727272727272727,
296
- "eval_recall": 0.8607594936708861,
297
- "eval_roc_auc": 0.9685867560299067,
298
- "eval_runtime": 3.583,
299
- "eval_samples_per_second": 272.954,
300
- "eval_steps_per_second": 8.652,
301
  "step": 915
302
  },
303
  {
304
  "epoch": 3.0327868852459017,
305
- "grad_norm": 0.36435577273368835,
306
  "learning_rate": 8.754552075746541e-06,
307
- "loss": 0.12846416473388672,
308
  "step": 925
309
  },
310
  {
311
  "epoch": 3.1147540983606556,
312
- "grad_norm": 0.12181571871042252,
313
  "learning_rate": 8.390386016023307e-06,
314
- "loss": 0.07323605537414551,
315
  "step": 950
316
  },
317
  {
318
  "epoch": 3.19672131147541,
319
- "grad_norm": 0.08613187074661255,
320
  "learning_rate": 8.026219956300074e-06,
321
- "loss": 0.11347267150878906,
322
  "step": 975
323
  },
324
  {
325
  "epoch": 3.278688524590164,
326
- "grad_norm": 2.452489137649536,
327
  "learning_rate": 7.66205389657684e-06,
328
- "loss": 0.07726279258728028,
329
  "step": 1000
330
  },
331
  {
332
  "epoch": 3.360655737704918,
333
- "grad_norm": 0.05209459364414215,
334
  "learning_rate": 7.2978878368536055e-06,
335
- "loss": 0.07418290138244629,
336
  "step": 1025
337
  },
338
  {
339
  "epoch": 3.442622950819672,
340
- "grad_norm": 20.379858016967773,
341
  "learning_rate": 6.933721777130372e-06,
342
- "loss": 0.12044317245483399,
343
  "step": 1050
344
  },
345
  {
346
  "epoch": 3.5245901639344264,
347
- "grad_norm": 0.8267766237258911,
348
  "learning_rate": 6.569555717407138e-06,
349
- "loss": 0.07422394752502441,
350
  "step": 1075
351
  },
352
  {
353
  "epoch": 3.6065573770491803,
354
- "grad_norm": 0.18650104105472565,
355
  "learning_rate": 6.2053896576839045e-06,
356
- "loss": 0.07682370185852051,
357
  "step": 1100
358
  },
359
  {
360
  "epoch": 3.6885245901639343,
361
- "grad_norm": 0.07509063929319382,
362
  "learning_rate": 5.84122359796067e-06,
363
- "loss": 0.06737090110778808,
364
  "step": 1125
365
  },
366
  {
367
  "epoch": 3.7704918032786887,
368
- "grad_norm": 16.239089965820312,
369
  "learning_rate": 5.477057538237437e-06,
370
- "loss": 0.0853554630279541,
371
  "step": 1150
372
  },
373
  {
374
  "epoch": 3.8524590163934427,
375
- "grad_norm": 0.07212834060192108,
376
  "learning_rate": 5.112891478514203e-06,
377
- "loss": 0.07101381301879883,
378
  "step": 1175
379
  },
380
  {
381
  "epoch": 3.9344262295081966,
382
- "grad_norm": 0.05511339381337166,
383
  "learning_rate": 4.748725418790969e-06,
384
- "loss": 0.09712701797485351,
385
  "step": 1200
386
  },
387
  {
388
  "epoch": 4.0,
389
- "eval_accuracy": 0.9734151329243353,
390
- "eval_f1": 0.8414634146341463,
391
- "eval_loss": 0.11315910518169403,
392
- "eval_precision": 0.8117647058823529,
393
- "eval_recall": 0.8734177215189873,
394
- "eval_roc_auc": 0.9707692091071654,
395
- "eval_runtime": 3.7125,
396
- "eval_samples_per_second": 263.434,
397
- "eval_steps_per_second": 8.35,
398
  "step": 1220
399
  },
400
  {
401
  "epoch": 4.016393442622951,
402
- "grad_norm": 0.16843096911907196,
403
  "learning_rate": 4.3845593590677355e-06,
404
- "loss": 0.08288318634033204,
405
  "step": 1225
406
  },
407
  {
408
  "epoch": 4.098360655737705,
409
- "grad_norm": 0.12639367580413818,
410
  "learning_rate": 4.020393299344502e-06,
411
- "loss": 0.0468332052230835,
412
  "step": 1250
413
  },
414
  {
415
  "epoch": 4.180327868852459,
416
- "grad_norm": 0.11602156609296799,
417
  "learning_rate": 3.656227239621268e-06,
418
- "loss": 0.041537661552429196,
419
  "step": 1275
420
  },
421
  {
422
  "epoch": 4.262295081967213,
423
- "grad_norm": 0.046698153018951416,
424
  "learning_rate": 3.292061179898034e-06,
425
- "loss": 0.04678268432617187,
426
  "step": 1300
427
  },
428
  {
429
  "epoch": 4.344262295081967,
430
- "grad_norm": 0.05532635748386383,
431
  "learning_rate": 2.9278951201748e-06,
432
- "loss": 0.04036891937255859,
433
  "step": 1325
434
  },
435
  {
436
  "epoch": 4.426229508196721,
437
- "grad_norm": 0.1428351104259491,
438
  "learning_rate": 2.5637290604515665e-06,
439
- "loss": 0.037136049270629884,
440
  "step": 1350
441
  },
442
  {
443
  "epoch": 4.508196721311475,
444
- "grad_norm": 0.11422587931156158,
445
  "learning_rate": 2.1995630007283324e-06,
446
- "loss": 0.09556760787963867,
447
  "step": 1375
448
  },
449
  {
450
  "epoch": 4.590163934426229,
451
- "grad_norm": 0.10767149180173874,
452
  "learning_rate": 1.8353969410050983e-06,
453
- "loss": 0.07056922912597656,
454
  "step": 1400
455
  },
456
  {
457
  "epoch": 4.672131147540983,
458
- "grad_norm": 7.874922752380371,
459
  "learning_rate": 1.4712308812818645e-06,
460
- "loss": 0.0239799165725708,
461
  "step": 1425
462
  },
463
  {
464
  "epoch": 4.754098360655737,
465
- "grad_norm": 0.9701394438743591,
466
  "learning_rate": 1.1070648215586309e-06,
467
- "loss": 0.06149462699890137,
468
  "step": 1450
469
  },
470
  {
471
  "epoch": 4.836065573770492,
472
- "grad_norm": 0.05171401798725128,
473
  "learning_rate": 7.428987618353969e-07,
474
- "loss": 0.038357572555541994,
475
  "step": 1475
476
  },
477
  {
478
  "epoch": 4.918032786885246,
479
- "grad_norm": 0.05953866243362427,
480
  "learning_rate": 3.787327021121632e-07,
481
- "loss": 0.12664172172546387,
482
  "step": 1500
483
  },
484
  {
485
  "epoch": 5.0,
486
- "grad_norm": 0.10173187404870987,
487
  "learning_rate": 1.4566642388929353e-08,
488
- "loss": 0.1284790515899658,
489
  "step": 1525
490
  },
491
  {
492
  "epoch": 5.0,
493
- "eval_accuracy": 0.9713701431492843,
494
- "eval_f1": 0.8313253012048193,
495
- "eval_loss": 0.1342686116695404,
496
- "eval_precision": 0.7931034482758621,
497
- "eval_recall": 0.8734177215189873,
498
- "eval_roc_auc": 0.9718111544472763,
499
- "eval_runtime": 3.5346,
500
- "eval_samples_per_second": 276.69,
501
- "eval_steps_per_second": 8.77,
502
  "step": 1525
503
  }
504
  ],
@@ -514,7 +514,7 @@
514
  "early_stopping_threshold": 0.0
515
  },
516
  "attributes": {
517
- "early_stopping_patience_counter": 1
518
  }
519
  },
520
  "TrainerControl": {
 
1
  {
2
+ "best_global_step": 915,
3
+ "best_metric": 0.8220858895705522,
4
+ "best_model_checkpoint": "/content/agri-utilization-classifier/transformer/checkpoint-915",
5
  "epoch": 5.0,
6
  "eval_steps": 500,
7
  "global_step": 1525,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.08196721311475409,
14
+ "grad_norm": 6.055062770843506,
15
  "learning_rate": 3.157894736842105e-06,
16
+ "loss": 0.62972900390625,
17
  "step": 25
18
  },
19
  {
20
  "epoch": 0.16393442622950818,
21
+ "grad_norm": 10.6914701461792,
22
  "learning_rate": 6.447368421052632e-06,
23
+ "loss": 0.44850738525390627,
24
  "step": 50
25
  },
26
  {
27
  "epoch": 0.2459016393442623,
28
+ "grad_norm": 6.670228481292725,
29
  "learning_rate": 9.736842105263159e-06,
30
+ "loss": 0.3566379165649414,
31
  "step": 75
32
  },
33
  {
34
  "epoch": 0.32786885245901637,
35
+ "grad_norm": 2.589911937713623,
36
  "learning_rate": 1.3026315789473684e-05,
37
+ "loss": 0.2718839645385742,
38
  "step": 100
39
  },
40
  {
41
  "epoch": 0.4098360655737705,
42
+ "grad_norm": 22.02676773071289,
43
  "learning_rate": 1.6315789473684213e-05,
44
+ "loss": 0.1922766876220703,
45
  "step": 125
46
  },
47
  {
48
  "epoch": 0.4918032786885246,
49
+ "grad_norm": 2.6362855434417725,
50
  "learning_rate": 1.960526315789474e-05,
51
+ "loss": 0.1837622833251953,
52
  "step": 150
53
  },
54
  {
55
  "epoch": 0.5737704918032787,
56
+ "grad_norm": 3.478484630584717,
57
  "learning_rate": 1.9679533867443555e-05,
58
+ "loss": 0.18766048431396484,
59
  "step": 175
60
  },
61
  {
62
  "epoch": 0.6557377049180327,
63
+ "grad_norm": 8.077605247497559,
64
  "learning_rate": 1.9315367807720323e-05,
65
+ "loss": 0.23830581665039063,
66
  "step": 200
67
  },
68
  {
69
  "epoch": 0.7377049180327869,
70
+ "grad_norm": 0.7427046298980713,
71
  "learning_rate": 1.8951201747997088e-05,
72
+ "loss": 0.30742517471313474,
73
  "step": 225
74
  },
75
  {
76
  "epoch": 0.819672131147541,
77
+ "grad_norm": 36.34975051879883,
78
  "learning_rate": 1.8587035688273852e-05,
79
+ "loss": 0.22336017608642578,
80
  "step": 250
81
  },
82
  {
83
  "epoch": 0.9016393442622951,
84
+ "grad_norm": 5.215510845184326,
85
  "learning_rate": 1.822286962855062e-05,
86
+ "loss": 0.13779294967651368,
87
  "step": 275
88
  },
89
  {
90
  "epoch": 0.9836065573770492,
91
+ "grad_norm": 3.551121950149536,
92
  "learning_rate": 1.7858703568827385e-05,
93
+ "loss": 0.19200111389160157,
94
  "step": 300
95
  },
96
  {
97
  "epoch": 1.0,
98
+ "eval_accuracy": 0.9631901840490797,
99
+ "eval_f1": 0.7721518987341772,
100
+ "eval_loss": 0.1292734444141388,
101
+ "eval_precision": 0.7721518987341772,
102
+ "eval_recall": 0.7721518987341772,
103
+ "eval_roc_auc": 0.9563720589684741,
104
+ "eval_runtime": 3.3396,
105
+ "eval_samples_per_second": 292.853,
106
+ "eval_steps_per_second": 9.283,
107
  "step": 305
108
  },
109
  {
110
  "epoch": 1.0655737704918034,
111
+ "grad_norm": 0.5402449369430542,
112
  "learning_rate": 1.7494537509104153e-05,
113
+ "loss": 0.1241053295135498,
114
  "step": 325
115
  },
116
  {
117
  "epoch": 1.1475409836065573,
118
+ "grad_norm": 4.476892948150635,
119
  "learning_rate": 1.7130371449380918e-05,
120
+ "loss": 0.20724605560302733,
121
  "step": 350
122
  },
123
  {
124
  "epoch": 1.2295081967213115,
125
+ "grad_norm": 0.46729782223701477,
126
  "learning_rate": 1.6766205389657686e-05,
127
+ "loss": 0.13567353248596192,
128
  "step": 375
129
  },
130
  {
131
  "epoch": 1.3114754098360657,
132
+ "grad_norm": 0.1852118819952011,
133
  "learning_rate": 1.640203932993445e-05,
134
+ "loss": 0.13295170783996582,
135
  "step": 400
136
  },
137
  {
138
  "epoch": 1.3934426229508197,
139
+ "grad_norm": 1.2681413888931274,
140
  "learning_rate": 1.603787327021122e-05,
141
+ "loss": 0.2027936363220215,
142
  "step": 425
143
  },
144
  {
145
  "epoch": 1.4754098360655736,
146
+ "grad_norm": 7.484091281890869,
147
  "learning_rate": 1.5673707210487983e-05,
148
+ "loss": 0.12364128112792969,
149
  "step": 450
150
  },
151
  {
152
  "epoch": 1.5573770491803278,
153
+ "grad_norm": 0.46489500999450684,
154
  "learning_rate": 1.530954115076475e-05,
155
+ "loss": 0.14407362937927246,
156
  "step": 475
157
  },
158
  {
159
  "epoch": 1.639344262295082,
160
+ "grad_norm": 0.20967872440814972,
161
  "learning_rate": 1.4945375091041516e-05,
162
+ "loss": 0.12458925247192383,
163
  "step": 500
164
  },
165
  {
166
  "epoch": 1.721311475409836,
167
+ "grad_norm": 0.1643747240304947,
168
  "learning_rate": 1.4581209031318282e-05,
169
+ "loss": 0.21631996154785157,
170
  "step": 525
171
  },
172
  {
173
  "epoch": 1.8032786885245902,
174
+ "grad_norm": 7.073329448699951,
175
  "learning_rate": 1.4217042971595047e-05,
176
+ "loss": 0.16043865203857421,
177
  "step": 550
178
  },
179
  {
180
  "epoch": 1.8852459016393444,
181
+ "grad_norm": 1.744958758354187,
182
  "learning_rate": 1.3852876911871815e-05,
183
+ "loss": 0.0966644287109375,
184
  "step": 575
185
  },
186
  {
187
  "epoch": 1.9672131147540983,
188
+ "grad_norm": 12.79035472869873,
189
  "learning_rate": 1.3488710852148582e-05,
190
+ "loss": 0.15884541511535644,
191
  "step": 600
192
  },
193
  {
194
  "epoch": 2.0,
195
+ "eval_accuracy": 0.9611451942740287,
196
+ "eval_f1": 0.7432432432432432,
197
+ "eval_loss": 0.13287827372550964,
198
+ "eval_precision": 0.7971014492753623,
199
+ "eval_recall": 0.6962025316455697,
200
+ "eval_roc_auc": 0.9594697343039381,
201
+ "eval_runtime": 3.2739,
202
+ "eval_samples_per_second": 298.727,
203
+ "eval_steps_per_second": 9.469,
204
  "step": 610
205
  },
206
  {
207
  "epoch": 2.0491803278688523,
208
+ "grad_norm": 17.520444869995117,
209
  "learning_rate": 1.3124544792425346e-05,
210
+ "loss": 0.08896012306213379,
211
  "step": 625
212
  },
213
  {
214
  "epoch": 2.1311475409836067,
215
+ "grad_norm": 0.16623224318027496,
216
  "learning_rate": 1.2760378732702113e-05,
217
+ "loss": 0.11752216339111328,
218
  "step": 650
219
  },
220
  {
221
  "epoch": 2.2131147540983607,
222
+ "grad_norm": 0.20762814581394196,
223
  "learning_rate": 1.239621267297888e-05,
224
+ "loss": 0.1193038272857666,
225
  "step": 675
226
  },
227
  {
228
  "epoch": 2.2950819672131146,
229
+ "grad_norm": 0.1500111073255539,
230
  "learning_rate": 1.2032046613255645e-05,
231
+ "loss": 0.0630855655670166,
232
  "step": 700
233
  },
234
  {
235
  "epoch": 2.3770491803278686,
236
+ "grad_norm": 0.17727839946746826,
237
  "learning_rate": 1.1667880553532412e-05,
238
+ "loss": 0.08730959892272949,
239
  "step": 725
240
  },
241
  {
242
  "epoch": 2.459016393442623,
243
+ "grad_norm": 4.3997321128845215,
244
  "learning_rate": 1.1303714493809176e-05,
245
+ "loss": 0.12114215850830078,
246
  "step": 750
247
  },
248
  {
249
  "epoch": 2.540983606557377,
250
+ "grad_norm": 34.47224044799805,
251
  "learning_rate": 1.0939548434085944e-05,
252
+ "loss": 0.11070786476135254,
253
  "step": 775
254
  },
255
  {
256
  "epoch": 2.6229508196721314,
257
+ "grad_norm": 25.977081298828125,
258
  "learning_rate": 1.057538237436271e-05,
259
+ "loss": 0.10845686912536621,
260
  "step": 800
261
  },
262
  {
263
  "epoch": 2.7049180327868854,
264
+ "grad_norm": 0.1657736450433731,
265
  "learning_rate": 1.0211216314639475e-05,
266
+ "loss": 0.1025285530090332,
267
  "step": 825
268
  },
269
  {
270
  "epoch": 2.7868852459016393,
271
+ "grad_norm": 34.05498504638672,
272
  "learning_rate": 9.847050254916243e-06,
273
+ "loss": 0.07825160026550293,
274
  "step": 850
275
  },
276
  {
277
  "epoch": 2.8688524590163933,
278
+ "grad_norm": 0.2868161201477051,
279
  "learning_rate": 9.482884195193008e-06,
280
+ "loss": 0.12041816711425782,
281
  "step": 875
282
  },
283
  {
284
  "epoch": 2.9508196721311473,
285
+ "grad_norm": 0.19192977249622345,
286
  "learning_rate": 9.118718135469774e-06,
287
+ "loss": 0.08709416389465333,
288
  "step": 900
289
  },
290
  {
291
  "epoch": 3.0,
292
+ "eval_accuracy": 0.9703476482617587,
293
+ "eval_f1": 0.8220858895705522,
294
+ "eval_loss": 0.11163181066513062,
295
+ "eval_precision": 0.7976190476190477,
296
+ "eval_recall": 0.8481012658227848,
297
+ "eval_roc_auc": 0.9661086157615353,
298
+ "eval_runtime": 3.1733,
299
+ "eval_samples_per_second": 308.193,
300
+ "eval_steps_per_second": 9.769,
301
  "step": 915
302
  },
303
  {
304
  "epoch": 3.0327868852459017,
305
+ "grad_norm": 1.0706992149353027,
306
  "learning_rate": 8.754552075746541e-06,
307
+ "loss": 0.10751664161682128,
308
  "step": 925
309
  },
310
  {
311
  "epoch": 3.1147540983606556,
312
+ "grad_norm": 0.12844231724739075,
313
  "learning_rate": 8.390386016023307e-06,
314
+ "loss": 0.06818144798278808,
315
  "step": 950
316
  },
317
  {
318
  "epoch": 3.19672131147541,
319
+ "grad_norm": 0.07692205160856247,
320
  "learning_rate": 8.026219956300074e-06,
321
+ "loss": 0.12229555130004882,
322
  "step": 975
323
  },
324
  {
325
  "epoch": 3.278688524590164,
326
+ "grad_norm": 1.773990511894226,
327
  "learning_rate": 7.66205389657684e-06,
328
+ "loss": 0.06936595916748046,
329
  "step": 1000
330
  },
331
  {
332
  "epoch": 3.360655737704918,
333
+ "grad_norm": 0.07844381034374237,
334
  "learning_rate": 7.2978878368536055e-06,
335
+ "loss": 0.05219663143157959,
336
  "step": 1025
337
  },
338
  {
339
  "epoch": 3.442622950819672,
340
+ "grad_norm": 12.502548217773438,
341
  "learning_rate": 6.933721777130372e-06,
342
+ "loss": 0.06849228858947753,
343
  "step": 1050
344
  },
345
  {
346
  "epoch": 3.5245901639344264,
347
+ "grad_norm": 1.6993861198425293,
348
  "learning_rate": 6.569555717407138e-06,
349
+ "loss": 0.08783550262451172,
350
  "step": 1075
351
  },
352
  {
353
  "epoch": 3.6065573770491803,
354
+ "grad_norm": 0.06551510095596313,
355
  "learning_rate": 6.2053896576839045e-06,
356
+ "loss": 0.049420347213745115,
357
  "step": 1100
358
  },
359
  {
360
  "epoch": 3.6885245901639343,
361
+ "grad_norm": 0.034276798367500305,
362
  "learning_rate": 5.84122359796067e-06,
363
+ "loss": 0.05244039058685303,
364
  "step": 1125
365
  },
366
  {
367
  "epoch": 3.7704918032786887,
368
+ "grad_norm": 10.901683807373047,
369
  "learning_rate": 5.477057538237437e-06,
370
+ "loss": 0.06656317710876465,
371
  "step": 1150
372
  },
373
  {
374
  "epoch": 3.8524590163934427,
375
+ "grad_norm": 2.3856894969940186,
376
  "learning_rate": 5.112891478514203e-06,
377
+ "loss": 0.06277508735656738,
378
  "step": 1175
379
  },
380
  {
381
  "epoch": 3.9344262295081966,
382
+ "grad_norm": 0.018699949607253075,
383
  "learning_rate": 4.748725418790969e-06,
384
+ "loss": 0.046858911514282224,
385
  "step": 1200
386
  },
387
  {
388
  "epoch": 4.0,
389
+ "eval_accuracy": 0.9662576687116564,
390
+ "eval_f1": 0.8047337278106509,
391
+ "eval_loss": 0.14547723531723022,
392
+ "eval_precision": 0.7555555555555555,
393
+ "eval_recall": 0.8607594936708861,
394
+ "eval_roc_auc": 0.9600822291998141,
395
+ "eval_runtime": 3.1883,
396
+ "eval_samples_per_second": 306.745,
397
+ "eval_steps_per_second": 9.723,
398
  "step": 1220
399
  },
400
  {
401
  "epoch": 4.016393442622951,
402
+ "grad_norm": 2.7085537910461426,
403
  "learning_rate": 4.3845593590677355e-06,
404
+ "loss": 0.0630385398864746,
405
  "step": 1225
406
  },
407
  {
408
  "epoch": 4.098360655737705,
409
+ "grad_norm": 45.71488952636719,
410
  "learning_rate": 4.020393299344502e-06,
411
+ "loss": 0.03476689338684082,
412
  "step": 1250
413
  },
414
  {
415
  "epoch": 4.180327868852459,
416
+ "grad_norm": 0.14275555312633514,
417
  "learning_rate": 3.656227239621268e-06,
418
+ "loss": 0.04420119285583496,
419
  "step": 1275
420
  },
421
  {
422
  "epoch": 4.262295081967213,
423
+ "grad_norm": 0.03228295221924782,
424
  "learning_rate": 3.292061179898034e-06,
425
+ "loss": 0.0440864372253418,
426
  "step": 1300
427
  },
428
  {
429
  "epoch": 4.344262295081967,
430
+ "grad_norm": 0.019369477406144142,
431
  "learning_rate": 2.9278951201748e-06,
432
+ "loss": 0.035089619159698486,
433
  "step": 1325
434
  },
435
  {
436
  "epoch": 4.426229508196721,
437
+ "grad_norm": 0.08530243486166,
438
  "learning_rate": 2.5637290604515665e-06,
439
+ "loss": 0.04550007343292237,
440
  "step": 1350
441
  },
442
  {
443
  "epoch": 4.508196721311475,
444
+ "grad_norm": 0.037266574800014496,
445
  "learning_rate": 2.1995630007283324e-06,
446
+ "loss": 0.059287338256835936,
447
  "step": 1375
448
  },
449
  {
450
  "epoch": 4.590163934426229,
451
+ "grad_norm": 0.07883958518505096,
452
  "learning_rate": 1.8353969410050983e-06,
453
+ "loss": 0.041749396324157716,
454
  "step": 1400
455
  },
456
  {
457
  "epoch": 4.672131147540983,
458
+ "grad_norm": 11.786356925964355,
459
  "learning_rate": 1.4712308812818645e-06,
460
+ "loss": 0.046635646820068356,
461
  "step": 1425
462
  },
463
  {
464
  "epoch": 4.754098360655737,
465
+ "grad_norm": 3.003070831298828,
466
  "learning_rate": 1.1070648215586309e-06,
467
+ "loss": 0.07572799682617187,
468
  "step": 1450
469
  },
470
  {
471
  "epoch": 4.836065573770492,
472
+ "grad_norm": 0.07770609855651855,
473
  "learning_rate": 7.428987618353969e-07,
474
+ "loss": 0.01340787172317505,
475
  "step": 1475
476
  },
477
  {
478
  "epoch": 4.918032786885246,
479
+ "grad_norm": 0.022642159834504128,
480
  "learning_rate": 3.787327021121632e-07,
481
+ "loss": 0.05259611129760742,
482
  "step": 1500
483
  },
484
  {
485
  "epoch": 5.0,
486
+ "grad_norm": 0.03238137811422348,
487
  "learning_rate": 1.4566642388929353e-08,
488
+ "loss": 0.058229475021362304,
489
  "step": 1525
490
  },
491
  {
492
  "epoch": 5.0,
493
+ "eval_accuracy": 0.9703476482617587,
494
+ "eval_f1": 0.8220858895705522,
495
+ "eval_loss": 0.1447911262512207,
496
+ "eval_precision": 0.7976190476190477,
497
+ "eval_recall": 0.8481012658227848,
498
+ "eval_roc_auc": 0.9660241337069317,
499
+ "eval_runtime": 3.4435,
500
+ "eval_samples_per_second": 284.012,
501
+ "eval_steps_per_second": 9.002,
502
  "step": 1525
503
  }
504
  ],
 
514
  "early_stopping_threshold": 0.0
515
  },
516
  "attributes": {
517
+ "early_stopping_patience_counter": 2
518
  }
519
  },
520
  "TrainerControl": {
transformer/checkpoint-305/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc535e56030653cadcb705a1e64ce0106ee66296624c8cf834067d6d2304bad5
3
  size 1112205008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1873e7a4c6babfc7c2968fb8d3cedcd6f4aef898980615480787cebcf2a5dfd8
3
  size 1112205008
transformer/checkpoint-305/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69cab210a194b5994a26d901bb7bc3744ea997681fc2128ced977095df0f3d95
3
  size 2224532875
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:949dd09537449c52151556c073e03cd92355984ac38ba4355f4be6a7c633b13e
3
  size 2224532875
transformer/checkpoint-305/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4804ba23baab7f91aff02de6948cc425203dfa59580fa70cc5f769dc34b74cb
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b7065a647ca661fe79bdd011de3c790f0dbd92072446af4dc14ee2adc84bb56
3
  size 1383
transformer/checkpoint-305/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 305,
3
- "best_metric": 0.7741935483870968,
4
  "best_model_checkpoint": "/content/agri-utilization-classifier/transformer/checkpoint-305",
5
  "epoch": 1.0,
6
  "eval_steps": 500,
@@ -11,99 +11,99 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.08196721311475409,
14
- "grad_norm": Infinity,
15
  "learning_rate": 3.157894736842105e-06,
16
- "loss": 0.7012384033203125,
17
  "step": 25
18
  },
19
  {
20
  "epoch": 0.16393442622950818,
21
- "grad_norm": 11.968428611755371,
22
  "learning_rate": 6.447368421052632e-06,
23
- "loss": 0.4254766845703125,
24
  "step": 50
25
  },
26
  {
27
  "epoch": 0.2459016393442623,
28
- "grad_norm": 19.943565368652344,
29
  "learning_rate": 9.736842105263159e-06,
30
- "loss": 0.3554811859130859,
31
  "step": 75
32
  },
33
  {
34
  "epoch": 0.32786885245901637,
35
- "grad_norm": 5.117671489715576,
36
  "learning_rate": 1.3026315789473684e-05,
37
- "loss": 0.3145046615600586,
38
  "step": 100
39
  },
40
  {
41
  "epoch": 0.4098360655737705,
42
- "grad_norm": 11.402164459228516,
43
  "learning_rate": 1.6315789473684213e-05,
44
- "loss": 0.2773847770690918,
45
  "step": 125
46
  },
47
  {
48
  "epoch": 0.4918032786885246,
49
- "grad_norm": 8.077093124389648,
50
  "learning_rate": 1.960526315789474e-05,
51
- "loss": 0.2556156539916992,
52
  "step": 150
53
  },
54
  {
55
  "epoch": 0.5737704918032787,
56
- "grad_norm": 2.0055508613586426,
57
  "learning_rate": 1.9679533867443555e-05,
58
- "loss": 0.25026893615722656,
59
  "step": 175
60
  },
61
  {
62
  "epoch": 0.6557377049180327,
63
- "grad_norm": 0.9293265342712402,
64
  "learning_rate": 1.9315367807720323e-05,
65
- "loss": 0.24691852569580078,
66
  "step": 200
67
  },
68
  {
69
  "epoch": 0.7377049180327869,
70
- "grad_norm": 1.4303064346313477,
71
  "learning_rate": 1.8951201747997088e-05,
72
- "loss": 0.21208112716674804,
73
  "step": 225
74
  },
75
  {
76
  "epoch": 0.819672131147541,
77
- "grad_norm": 4.035928249359131,
78
  "learning_rate": 1.8587035688273852e-05,
79
- "loss": 0.17017290115356445,
80
  "step": 250
81
  },
82
  {
83
  "epoch": 0.9016393442622951,
84
- "grad_norm": 4.241303443908691,
85
  "learning_rate": 1.822286962855062e-05,
86
- "loss": 0.16115386962890624,
87
  "step": 275
88
  },
89
  {
90
  "epoch": 0.9836065573770492,
91
- "grad_norm": 0.3990240693092346,
92
  "learning_rate": 1.7858703568827385e-05,
93
- "loss": 0.18343988418579102,
94
  "step": 300
95
  },
96
  {
97
  "epoch": 1.0,
98
- "eval_accuracy": 0.9642126789366053,
99
- "eval_f1": 0.7741935483870968,
100
- "eval_loss": 0.14546315371990204,
101
- "eval_precision": 0.7894736842105263,
102
- "eval_recall": 0.759493670886076,
103
- "eval_roc_auc": 0.9131946888948339,
104
- "eval_runtime": 3.5921,
105
- "eval_samples_per_second": 272.266,
106
- "eval_steps_per_second": 8.63,
107
  "step": 305
108
  }
109
  ],
 
1
  {
2
  "best_global_step": 305,
3
+ "best_metric": 0.7721518987341772,
4
  "best_model_checkpoint": "/content/agri-utilization-classifier/transformer/checkpoint-305",
5
  "epoch": 1.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.08196721311475409,
14
+ "grad_norm": 6.055062770843506,
15
  "learning_rate": 3.157894736842105e-06,
16
+ "loss": 0.62972900390625,
17
  "step": 25
18
  },
19
  {
20
  "epoch": 0.16393442622950818,
21
+ "grad_norm": 10.6914701461792,
22
  "learning_rate": 6.447368421052632e-06,
23
+ "loss": 0.44850738525390627,
24
  "step": 50
25
  },
26
  {
27
  "epoch": 0.2459016393442623,
28
+ "grad_norm": 6.670228481292725,
29
  "learning_rate": 9.736842105263159e-06,
30
+ "loss": 0.3566379165649414,
31
  "step": 75
32
  },
33
  {
34
  "epoch": 0.32786885245901637,
35
+ "grad_norm": 2.589911937713623,
36
  "learning_rate": 1.3026315789473684e-05,
37
+ "loss": 0.2718839645385742,
38
  "step": 100
39
  },
40
  {
41
  "epoch": 0.4098360655737705,
42
+ "grad_norm": 22.02676773071289,
43
  "learning_rate": 1.6315789473684213e-05,
44
+ "loss": 0.1922766876220703,
45
  "step": 125
46
  },
47
  {
48
  "epoch": 0.4918032786885246,
49
+ "grad_norm": 2.6362855434417725,
50
  "learning_rate": 1.960526315789474e-05,
51
+ "loss": 0.1837622833251953,
52
  "step": 150
53
  },
54
  {
55
  "epoch": 0.5737704918032787,
56
+ "grad_norm": 3.478484630584717,
57
  "learning_rate": 1.9679533867443555e-05,
58
+ "loss": 0.18766048431396484,
59
  "step": 175
60
  },
61
  {
62
  "epoch": 0.6557377049180327,
63
+ "grad_norm": 8.077605247497559,
64
  "learning_rate": 1.9315367807720323e-05,
65
+ "loss": 0.23830581665039063,
66
  "step": 200
67
  },
68
  {
69
  "epoch": 0.7377049180327869,
70
+ "grad_norm": 0.7427046298980713,
71
  "learning_rate": 1.8951201747997088e-05,
72
+ "loss": 0.30742517471313474,
73
  "step": 225
74
  },
75
  {
76
  "epoch": 0.819672131147541,
77
+ "grad_norm": 36.34975051879883,
78
  "learning_rate": 1.8587035688273852e-05,
79
+ "loss": 0.22336017608642578,
80
  "step": 250
81
  },
82
  {
83
  "epoch": 0.9016393442622951,
84
+ "grad_norm": 5.215510845184326,
85
  "learning_rate": 1.822286962855062e-05,
86
+ "loss": 0.13779294967651368,
87
  "step": 275
88
  },
89
  {
90
  "epoch": 0.9836065573770492,
91
+ "grad_norm": 3.551121950149536,
92
  "learning_rate": 1.7858703568827385e-05,
93
+ "loss": 0.19200111389160157,
94
  "step": 300
95
  },
96
  {
97
  "epoch": 1.0,
98
+ "eval_accuracy": 0.9631901840490797,
99
+ "eval_f1": 0.7721518987341772,
100
+ "eval_loss": 0.1292734444141388,
101
+ "eval_precision": 0.7721518987341772,
102
+ "eval_recall": 0.7721518987341772,
103
+ "eval_roc_auc": 0.9563720589684741,
104
+ "eval_runtime": 3.3396,
105
+ "eval_samples_per_second": 292.853,
106
+ "eval_steps_per_second": 9.283,
107
  "step": 305
108
  }
109
  ],
transformer/checkpoint-610/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7732d8cc10495269abb42e8c1bbdcf1de4f88f55ef74140711a8f112b4bf271e
3
  size 1112205008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c2e8002a8b39d6b2b729d256b3d4cff3d522204ecb453b2bd5c433f9bd4944f
3
  size 1112205008
transformer/checkpoint-610/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:14f0aa6126dd6cda43941781c125a8571de85d7824445cbdb13fba8b9ae327e9
3
  size 2224532875
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36fd23804b528193a6fc5999a821ef8809fb31a6efd5c61fe763007795ad7dff
3
  size 2224532875
transformer/checkpoint-610/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3c3655285f7ee33ffd38e6285ffefb860743212f30b81fe2645995678e63f71
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50d9d499a5525a1f496c3b9a272dbba833f43becb5d780497724ade85d68372c
3
  size 1383
transformer/checkpoint-610/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 305,
3
- "best_metric": 0.7741935483870968,
4
  "best_model_checkpoint": "/content/agri-utilization-classifier/transformer/checkpoint-305",
5
  "epoch": 2.0,
6
  "eval_steps": 500,
@@ -11,196 +11,196 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.08196721311475409,
14
- "grad_norm": Infinity,
15
  "learning_rate": 3.157894736842105e-06,
16
- "loss": 0.7012384033203125,
17
  "step": 25
18
  },
19
  {
20
  "epoch": 0.16393442622950818,
21
- "grad_norm": 11.968428611755371,
22
  "learning_rate": 6.447368421052632e-06,
23
- "loss": 0.4254766845703125,
24
  "step": 50
25
  },
26
  {
27
  "epoch": 0.2459016393442623,
28
- "grad_norm": 19.943565368652344,
29
  "learning_rate": 9.736842105263159e-06,
30
- "loss": 0.3554811859130859,
31
  "step": 75
32
  },
33
  {
34
  "epoch": 0.32786885245901637,
35
- "grad_norm": 5.117671489715576,
36
  "learning_rate": 1.3026315789473684e-05,
37
- "loss": 0.3145046615600586,
38
  "step": 100
39
  },
40
  {
41
  "epoch": 0.4098360655737705,
42
- "grad_norm": 11.402164459228516,
43
  "learning_rate": 1.6315789473684213e-05,
44
- "loss": 0.2773847770690918,
45
  "step": 125
46
  },
47
  {
48
  "epoch": 0.4918032786885246,
49
- "grad_norm": 8.077093124389648,
50
  "learning_rate": 1.960526315789474e-05,
51
- "loss": 0.2556156539916992,
52
  "step": 150
53
  },
54
  {
55
  "epoch": 0.5737704918032787,
56
- "grad_norm": 2.0055508613586426,
57
  "learning_rate": 1.9679533867443555e-05,
58
- "loss": 0.25026893615722656,
59
  "step": 175
60
  },
61
  {
62
  "epoch": 0.6557377049180327,
63
- "grad_norm": 0.9293265342712402,
64
  "learning_rate": 1.9315367807720323e-05,
65
- "loss": 0.24691852569580078,
66
  "step": 200
67
  },
68
  {
69
  "epoch": 0.7377049180327869,
70
- "grad_norm": 1.4303064346313477,
71
  "learning_rate": 1.8951201747997088e-05,
72
- "loss": 0.21208112716674804,
73
  "step": 225
74
  },
75
  {
76
  "epoch": 0.819672131147541,
77
- "grad_norm": 4.035928249359131,
78
  "learning_rate": 1.8587035688273852e-05,
79
- "loss": 0.17017290115356445,
80
  "step": 250
81
  },
82
  {
83
  "epoch": 0.9016393442622951,
84
- "grad_norm": 4.241303443908691,
85
  "learning_rate": 1.822286962855062e-05,
86
- "loss": 0.16115386962890624,
87
  "step": 275
88
  },
89
  {
90
  "epoch": 0.9836065573770492,
91
- "grad_norm": 0.3990240693092346,
92
  "learning_rate": 1.7858703568827385e-05,
93
- "loss": 0.18343988418579102,
94
  "step": 300
95
  },
96
  {
97
  "epoch": 1.0,
98
- "eval_accuracy": 0.9642126789366053,
99
- "eval_f1": 0.7741935483870968,
100
- "eval_loss": 0.14546315371990204,
101
- "eval_precision": 0.7894736842105263,
102
- "eval_recall": 0.759493670886076,
103
- "eval_roc_auc": 0.9131946888948339,
104
- "eval_runtime": 3.5921,
105
- "eval_samples_per_second": 272.266,
106
- "eval_steps_per_second": 8.63,
107
  "step": 305
108
  },
109
  {
110
  "epoch": 1.0655737704918034,
111
- "grad_norm": 4.865096569061279,
112
  "learning_rate": 1.7494537509104153e-05,
113
- "loss": 0.19335979461669922,
114
  "step": 325
115
  },
116
  {
117
  "epoch": 1.1475409836065573,
118
- "grad_norm": 1.9780689477920532,
119
  "learning_rate": 1.7130371449380918e-05,
120
- "loss": 0.2344082260131836,
121
  "step": 350
122
  },
123
  {
124
  "epoch": 1.2295081967213115,
125
- "grad_norm": 1.3759413957595825,
126
  "learning_rate": 1.6766205389657686e-05,
127
- "loss": 0.20309404373168946,
128
  "step": 375
129
  },
130
  {
131
  "epoch": 1.3114754098360657,
132
- "grad_norm": 0.30811628699302673,
133
  "learning_rate": 1.640203932993445e-05,
134
- "loss": 0.224365234375,
135
  "step": 400
136
  },
137
  {
138
  "epoch": 1.3934426229508197,
139
- "grad_norm": 5.530014514923096,
140
  "learning_rate": 1.603787327021122e-05,
141
- "loss": 0.14759160995483397,
142
  "step": 425
143
  },
144
  {
145
  "epoch": 1.4754098360655736,
146
- "grad_norm": 3.7750189304351807,
147
  "learning_rate": 1.5673707210487983e-05,
148
- "loss": 0.14137668609619142,
149
  "step": 450
150
  },
151
  {
152
  "epoch": 1.5573770491803278,
153
- "grad_norm": 1.8209186792373657,
154
  "learning_rate": 1.530954115076475e-05,
155
- "loss": 0.19394855499267577,
156
  "step": 475
157
  },
158
  {
159
  "epoch": 1.639344262295082,
160
- "grad_norm": 0.4824683368206024,
161
  "learning_rate": 1.4945375091041516e-05,
162
- "loss": 0.1700056266784668,
163
  "step": 500
164
  },
165
  {
166
  "epoch": 1.721311475409836,
167
- "grad_norm": 0.5682937502861023,
168
  "learning_rate": 1.4581209031318282e-05,
169
- "loss": 0.17243267059326173,
170
  "step": 525
171
  },
172
  {
173
  "epoch": 1.8032786885245902,
174
- "grad_norm": 2.2086634635925293,
175
  "learning_rate": 1.4217042971595047e-05,
176
- "loss": 0.15430424690246583,
177
  "step": 550
178
  },
179
  {
180
  "epoch": 1.8852459016393444,
181
- "grad_norm": 6.93908166885376,
182
  "learning_rate": 1.3852876911871815e-05,
183
- "loss": 0.10752416610717773,
184
  "step": 575
185
  },
186
  {
187
  "epoch": 1.9672131147540983,
188
- "grad_norm": 5.092395782470703,
189
  "learning_rate": 1.3488710852148582e-05,
190
- "loss": 0.21721889495849608,
191
  "step": 600
192
  },
193
  {
194
  "epoch": 2.0,
195
- "eval_accuracy": 0.9570552147239264,
196
- "eval_f1": 0.7692307692307693,
197
- "eval_loss": 0.12331932783126831,
198
- "eval_precision": 0.6796116504854369,
199
- "eval_recall": 0.8860759493670886,
200
- "eval_roc_auc": 0.9679672209628138,
201
- "eval_runtime": 3.5595,
202
- "eval_samples_per_second": 274.754,
203
- "eval_steps_per_second": 8.709,
204
  "step": 610
205
  }
206
  ],
 
1
  {
2
  "best_global_step": 305,
3
+ "best_metric": 0.7721518987341772,
4
  "best_model_checkpoint": "/content/agri-utilization-classifier/transformer/checkpoint-305",
5
  "epoch": 2.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.08196721311475409,
14
+ "grad_norm": 6.055062770843506,
15
  "learning_rate": 3.157894736842105e-06,
16
+ "loss": 0.62972900390625,
17
  "step": 25
18
  },
19
  {
20
  "epoch": 0.16393442622950818,
21
+ "grad_norm": 10.6914701461792,
22
  "learning_rate": 6.447368421052632e-06,
23
+ "loss": 0.44850738525390627,
24
  "step": 50
25
  },
26
  {
27
  "epoch": 0.2459016393442623,
28
+ "grad_norm": 6.670228481292725,
29
  "learning_rate": 9.736842105263159e-06,
30
+ "loss": 0.3566379165649414,
31
  "step": 75
32
  },
33
  {
34
  "epoch": 0.32786885245901637,
35
+ "grad_norm": 2.589911937713623,
36
  "learning_rate": 1.3026315789473684e-05,
37
+ "loss": 0.2718839645385742,
38
  "step": 100
39
  },
40
  {
41
  "epoch": 0.4098360655737705,
42
+ "grad_norm": 22.02676773071289,
43
  "learning_rate": 1.6315789473684213e-05,
44
+ "loss": 0.1922766876220703,
45
  "step": 125
46
  },
47
  {
48
  "epoch": 0.4918032786885246,
49
+ "grad_norm": 2.6362855434417725,
50
  "learning_rate": 1.960526315789474e-05,
51
+ "loss": 0.1837622833251953,
52
  "step": 150
53
  },
54
  {
55
  "epoch": 0.5737704918032787,
56
+ "grad_norm": 3.478484630584717,
57
  "learning_rate": 1.9679533867443555e-05,
58
+ "loss": 0.18766048431396484,
59
  "step": 175
60
  },
61
  {
62
  "epoch": 0.6557377049180327,
63
+ "grad_norm": 8.077605247497559,
64
  "learning_rate": 1.9315367807720323e-05,
65
+ "loss": 0.23830581665039063,
66
  "step": 200
67
  },
68
  {
69
  "epoch": 0.7377049180327869,
70
+ "grad_norm": 0.7427046298980713,
71
  "learning_rate": 1.8951201747997088e-05,
72
+ "loss": 0.30742517471313474,
73
  "step": 225
74
  },
75
  {
76
  "epoch": 0.819672131147541,
77
+ "grad_norm": 36.34975051879883,
78
  "learning_rate": 1.8587035688273852e-05,
79
+ "loss": 0.22336017608642578,
80
  "step": 250
81
  },
82
  {
83
  "epoch": 0.9016393442622951,
84
+ "grad_norm": 5.215510845184326,
85
  "learning_rate": 1.822286962855062e-05,
86
+ "loss": 0.13779294967651368,
87
  "step": 275
88
  },
89
  {
90
  "epoch": 0.9836065573770492,
91
+ "grad_norm": 3.551121950149536,
92
  "learning_rate": 1.7858703568827385e-05,
93
+ "loss": 0.19200111389160157,
94
  "step": 300
95
  },
96
  {
97
  "epoch": 1.0,
98
+ "eval_accuracy": 0.9631901840490797,
99
+ "eval_f1": 0.7721518987341772,
100
+ "eval_loss": 0.1292734444141388,
101
+ "eval_precision": 0.7721518987341772,
102
+ "eval_recall": 0.7721518987341772,
103
+ "eval_roc_auc": 0.9563720589684741,
104
+ "eval_runtime": 3.3396,
105
+ "eval_samples_per_second": 292.853,
106
+ "eval_steps_per_second": 9.283,
107
  "step": 305
108
  },
109
  {
110
  "epoch": 1.0655737704918034,
111
+ "grad_norm": 0.5402449369430542,
112
  "learning_rate": 1.7494537509104153e-05,
113
+ "loss": 0.1241053295135498,
114
  "step": 325
115
  },
116
  {
117
  "epoch": 1.1475409836065573,
118
+ "grad_norm": 4.476892948150635,
119
  "learning_rate": 1.7130371449380918e-05,
120
+ "loss": 0.20724605560302733,
121
  "step": 350
122
  },
123
  {
124
  "epoch": 1.2295081967213115,
125
+ "grad_norm": 0.46729782223701477,
126
  "learning_rate": 1.6766205389657686e-05,
127
+ "loss": 0.13567353248596192,
128
  "step": 375
129
  },
130
  {
131
  "epoch": 1.3114754098360657,
132
+ "grad_norm": 0.1852118819952011,
133
  "learning_rate": 1.640203932993445e-05,
134
+ "loss": 0.13295170783996582,
135
  "step": 400
136
  },
137
  {
138
  "epoch": 1.3934426229508197,
139
+ "grad_norm": 1.2681413888931274,
140
  "learning_rate": 1.603787327021122e-05,
141
+ "loss": 0.2027936363220215,
142
  "step": 425
143
  },
144
  {
145
  "epoch": 1.4754098360655736,
146
+ "grad_norm": 7.484091281890869,
147
  "learning_rate": 1.5673707210487983e-05,
148
+ "loss": 0.12364128112792969,
149
  "step": 450
150
  },
151
  {
152
  "epoch": 1.5573770491803278,
153
+ "grad_norm": 0.46489500999450684,
154
  "learning_rate": 1.530954115076475e-05,
155
+ "loss": 0.14407362937927246,
156
  "step": 475
157
  },
158
  {
159
  "epoch": 1.639344262295082,
160
+ "grad_norm": 0.20967872440814972,
161
  "learning_rate": 1.4945375091041516e-05,
162
+ "loss": 0.12458925247192383,
163
  "step": 500
164
  },
165
  {
166
  "epoch": 1.721311475409836,
167
+ "grad_norm": 0.1643747240304947,
168
  "learning_rate": 1.4581209031318282e-05,
169
+ "loss": 0.21631996154785157,
170
  "step": 525
171
  },
172
  {
173
  "epoch": 1.8032786885245902,
174
+ "grad_norm": 7.073329448699951,
175
  "learning_rate": 1.4217042971595047e-05,
176
+ "loss": 0.16043865203857421,
177
  "step": 550
178
  },
179
  {
180
  "epoch": 1.8852459016393444,
181
+ "grad_norm": 1.744958758354187,
182
  "learning_rate": 1.3852876911871815e-05,
183
+ "loss": 0.0966644287109375,
184
  "step": 575
185
  },
186
  {
187
  "epoch": 1.9672131147540983,
188
+ "grad_norm": 12.79035472869873,
189
  "learning_rate": 1.3488710852148582e-05,
190
+ "loss": 0.15884541511535644,
191
  "step": 600
192
  },
193
  {
194
  "epoch": 2.0,
195
+ "eval_accuracy": 0.9611451942740287,
196
+ "eval_f1": 0.7432432432432432,
197
+ "eval_loss": 0.13287827372550964,
198
+ "eval_precision": 0.7971014492753623,
199
+ "eval_recall": 0.6962025316455697,
200
+ "eval_roc_auc": 0.9594697343039381,
201
+ "eval_runtime": 3.2739,
202
+ "eval_samples_per_second": 298.727,
203
+ "eval_steps_per_second": 9.469,
204
  "step": 610
205
  }
206
  ],
transformer/checkpoint-915/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:250827bf49cc2428027e7841d0a3f651c274b7917ed1fbc9c81196269698a974
3
  size 1112205008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49a18c813f49f0f53eef5e1646a8e80f88eb366c956b09301312f1a23e9fe977
3
  size 1112205008
transformer/checkpoint-915/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2fa7d37127b9b7cd1466132b3107ea1dd648d688b50858d287773be798818f6
3
  size 2224532875
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2d30dd11d7303659faa787633c2b391e942f0078040c8d866d78e57de1a65f7
3
  size 2224532875
transformer/checkpoint-915/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22f7d99f4415d4a26286d13914edfc607115fbcba314cd2c896416d1ea8f5425
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:680cfaac80453c6e2276a5eeef2888cb64cee094a7610d7db58bd53646d2351a
3
  size 1383
transformer/checkpoint-915/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 915,
3
- "best_metric": 0.8143712574850299,
4
  "best_model_checkpoint": "/content/agri-utilization-classifier/transformer/checkpoint-915",
5
  "epoch": 3.0,
6
  "eval_steps": 500,
@@ -11,293 +11,293 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.08196721311475409,
14
- "grad_norm": Infinity,
15
  "learning_rate": 3.157894736842105e-06,
16
- "loss": 0.7012384033203125,
17
  "step": 25
18
  },
19
  {
20
  "epoch": 0.16393442622950818,
21
- "grad_norm": 11.968428611755371,
22
  "learning_rate": 6.447368421052632e-06,
23
- "loss": 0.4254766845703125,
24
  "step": 50
25
  },
26
  {
27
  "epoch": 0.2459016393442623,
28
- "grad_norm": 19.943565368652344,
29
  "learning_rate": 9.736842105263159e-06,
30
- "loss": 0.3554811859130859,
31
  "step": 75
32
  },
33
  {
34
  "epoch": 0.32786885245901637,
35
- "grad_norm": 5.117671489715576,
36
  "learning_rate": 1.3026315789473684e-05,
37
- "loss": 0.3145046615600586,
38
  "step": 100
39
  },
40
  {
41
  "epoch": 0.4098360655737705,
42
- "grad_norm": 11.402164459228516,
43
  "learning_rate": 1.6315789473684213e-05,
44
- "loss": 0.2773847770690918,
45
  "step": 125
46
  },
47
  {
48
  "epoch": 0.4918032786885246,
49
- "grad_norm": 8.077093124389648,
50
  "learning_rate": 1.960526315789474e-05,
51
- "loss": 0.2556156539916992,
52
  "step": 150
53
  },
54
  {
55
  "epoch": 0.5737704918032787,
56
- "grad_norm": 2.0055508613586426,
57
  "learning_rate": 1.9679533867443555e-05,
58
- "loss": 0.25026893615722656,
59
  "step": 175
60
  },
61
  {
62
  "epoch": 0.6557377049180327,
63
- "grad_norm": 0.9293265342712402,
64
  "learning_rate": 1.9315367807720323e-05,
65
- "loss": 0.24691852569580078,
66
  "step": 200
67
  },
68
  {
69
  "epoch": 0.7377049180327869,
70
- "grad_norm": 1.4303064346313477,
71
  "learning_rate": 1.8951201747997088e-05,
72
- "loss": 0.21208112716674804,
73
  "step": 225
74
  },
75
  {
76
  "epoch": 0.819672131147541,
77
- "grad_norm": 4.035928249359131,
78
  "learning_rate": 1.8587035688273852e-05,
79
- "loss": 0.17017290115356445,
80
  "step": 250
81
  },
82
  {
83
  "epoch": 0.9016393442622951,
84
- "grad_norm": 4.241303443908691,
85
  "learning_rate": 1.822286962855062e-05,
86
- "loss": 0.16115386962890624,
87
  "step": 275
88
  },
89
  {
90
  "epoch": 0.9836065573770492,
91
- "grad_norm": 0.3990240693092346,
92
  "learning_rate": 1.7858703568827385e-05,
93
- "loss": 0.18343988418579102,
94
  "step": 300
95
  },
96
  {
97
  "epoch": 1.0,
98
- "eval_accuracy": 0.9642126789366053,
99
- "eval_f1": 0.7741935483870968,
100
- "eval_loss": 0.14546315371990204,
101
- "eval_precision": 0.7894736842105263,
102
- "eval_recall": 0.759493670886076,
103
- "eval_roc_auc": 0.9131946888948339,
104
- "eval_runtime": 3.5921,
105
- "eval_samples_per_second": 272.266,
106
- "eval_steps_per_second": 8.63,
107
  "step": 305
108
  },
109
  {
110
  "epoch": 1.0655737704918034,
111
- "grad_norm": 4.865096569061279,
112
  "learning_rate": 1.7494537509104153e-05,
113
- "loss": 0.19335979461669922,
114
  "step": 325
115
  },
116
  {
117
  "epoch": 1.1475409836065573,
118
- "grad_norm": 1.9780689477920532,
119
  "learning_rate": 1.7130371449380918e-05,
120
- "loss": 0.2344082260131836,
121
  "step": 350
122
  },
123
  {
124
  "epoch": 1.2295081967213115,
125
- "grad_norm": 1.3759413957595825,
126
  "learning_rate": 1.6766205389657686e-05,
127
- "loss": 0.20309404373168946,
128
  "step": 375
129
  },
130
  {
131
  "epoch": 1.3114754098360657,
132
- "grad_norm": 0.30811628699302673,
133
  "learning_rate": 1.640203932993445e-05,
134
- "loss": 0.224365234375,
135
  "step": 400
136
  },
137
  {
138
  "epoch": 1.3934426229508197,
139
- "grad_norm": 5.530014514923096,
140
  "learning_rate": 1.603787327021122e-05,
141
- "loss": 0.14759160995483397,
142
  "step": 425
143
  },
144
  {
145
  "epoch": 1.4754098360655736,
146
- "grad_norm": 3.7750189304351807,
147
  "learning_rate": 1.5673707210487983e-05,
148
- "loss": 0.14137668609619142,
149
  "step": 450
150
  },
151
  {
152
  "epoch": 1.5573770491803278,
153
- "grad_norm": 1.8209186792373657,
154
  "learning_rate": 1.530954115076475e-05,
155
- "loss": 0.19394855499267577,
156
  "step": 475
157
  },
158
  {
159
  "epoch": 1.639344262295082,
160
- "grad_norm": 0.4824683368206024,
161
  "learning_rate": 1.4945375091041516e-05,
162
- "loss": 0.1700056266784668,
163
  "step": 500
164
  },
165
  {
166
  "epoch": 1.721311475409836,
167
- "grad_norm": 0.5682937502861023,
168
  "learning_rate": 1.4581209031318282e-05,
169
- "loss": 0.17243267059326173,
170
  "step": 525
171
  },
172
  {
173
  "epoch": 1.8032786885245902,
174
- "grad_norm": 2.2086634635925293,
175
  "learning_rate": 1.4217042971595047e-05,
176
- "loss": 0.15430424690246583,
177
  "step": 550
178
  },
179
  {
180
  "epoch": 1.8852459016393444,
181
- "grad_norm": 6.93908166885376,
182
  "learning_rate": 1.3852876911871815e-05,
183
- "loss": 0.10752416610717773,
184
  "step": 575
185
  },
186
  {
187
  "epoch": 1.9672131147540983,
188
- "grad_norm": 5.092395782470703,
189
  "learning_rate": 1.3488710852148582e-05,
190
- "loss": 0.21721889495849608,
191
  "step": 600
192
  },
193
  {
194
  "epoch": 2.0,
195
- "eval_accuracy": 0.9570552147239264,
196
- "eval_f1": 0.7692307692307693,
197
- "eval_loss": 0.12331932783126831,
198
- "eval_precision": 0.6796116504854369,
199
- "eval_recall": 0.8860759493670886,
200
- "eval_roc_auc": 0.9679672209628138,
201
- "eval_runtime": 3.5595,
202
- "eval_samples_per_second": 274.754,
203
- "eval_steps_per_second": 8.709,
204
  "step": 610
205
  },
206
  {
207
  "epoch": 2.0491803278688523,
208
- "grad_norm": 12.040640830993652,
209
  "learning_rate": 1.3124544792425346e-05,
210
- "loss": 0.11267939567565918,
211
  "step": 625
212
  },
213
  {
214
  "epoch": 2.1311475409836067,
215
- "grad_norm": 0.33291733264923096,
216
  "learning_rate": 1.2760378732702113e-05,
217
- "loss": 0.16029356002807618,
218
  "step": 650
219
  },
220
  {
221
  "epoch": 2.2131147540983607,
222
- "grad_norm": 0.1562187671661377,
223
  "learning_rate": 1.239621267297888e-05,
224
- "loss": 0.13354766845703125,
225
  "step": 675
226
  },
227
  {
228
  "epoch": 2.2950819672131146,
229
- "grad_norm": 0.39492854475975037,
230
  "learning_rate": 1.2032046613255645e-05,
231
- "loss": 0.08748809814453125,
232
  "step": 700
233
  },
234
  {
235
  "epoch": 2.3770491803278686,
236
- "grad_norm": 0.22857463359832764,
237
  "learning_rate": 1.1667880553532412e-05,
238
- "loss": 0.1255797290802002,
239
  "step": 725
240
  },
241
  {
242
  "epoch": 2.459016393442623,
243
- "grad_norm": 42.23853302001953,
244
  "learning_rate": 1.1303714493809176e-05,
245
- "loss": 0.09398910522460938,
246
  "step": 750
247
  },
248
  {
249
  "epoch": 2.540983606557377,
250
- "grad_norm": 9.628519058227539,
251
  "learning_rate": 1.0939548434085944e-05,
252
- "loss": 0.12067486763000489,
253
  "step": 775
254
  },
255
  {
256
  "epoch": 2.6229508196721314,
257
- "grad_norm": 8.281865119934082,
258
  "learning_rate": 1.057538237436271e-05,
259
- "loss": 0.0960771656036377,
260
  "step": 800
261
  },
262
  {
263
  "epoch": 2.7049180327868854,
264
- "grad_norm": 0.2366073578596115,
265
  "learning_rate": 1.0211216314639475e-05,
266
- "loss": 0.1477354335784912,
267
  "step": 825
268
  },
269
  {
270
  "epoch": 2.7868852459016393,
271
- "grad_norm": 2.127614974975586,
272
  "learning_rate": 9.847050254916243e-06,
273
- "loss": 0.12143749237060547,
274
  "step": 850
275
  },
276
  {
277
  "epoch": 2.8688524590163933,
278
- "grad_norm": 0.1283058375120163,
279
  "learning_rate": 9.482884195193008e-06,
280
- "loss": 0.0978905963897705,
281
  "step": 875
282
  },
283
  {
284
  "epoch": 2.9508196721311473,
285
- "grad_norm": 0.16377978026866913,
286
  "learning_rate": 9.118718135469774e-06,
287
- "loss": 0.13501665115356445,
288
  "step": 900
289
  },
290
  {
291
  "epoch": 3.0,
292
- "eval_accuracy": 0.9683026584867076,
293
- "eval_f1": 0.8143712574850299,
294
- "eval_loss": 0.11399859189987183,
295
- "eval_precision": 0.7727272727272727,
296
- "eval_recall": 0.8607594936708861,
297
- "eval_roc_auc": 0.9685867560299067,
298
- "eval_runtime": 3.583,
299
- "eval_samples_per_second": 272.954,
300
- "eval_steps_per_second": 8.652,
301
  "step": 915
302
  }
303
  ],
 
1
  {
2
  "best_global_step": 915,
3
+ "best_metric": 0.8220858895705522,
4
  "best_model_checkpoint": "/content/agri-utilization-classifier/transformer/checkpoint-915",
5
  "epoch": 3.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.08196721311475409,
14
+ "grad_norm": 6.055062770843506,
15
  "learning_rate": 3.157894736842105e-06,
16
+ "loss": 0.62972900390625,
17
  "step": 25
18
  },
19
  {
20
  "epoch": 0.16393442622950818,
21
+ "grad_norm": 10.6914701461792,
22
  "learning_rate": 6.447368421052632e-06,
23
+ "loss": 0.44850738525390627,
24
  "step": 50
25
  },
26
  {
27
  "epoch": 0.2459016393442623,
28
+ "grad_norm": 6.670228481292725,
29
  "learning_rate": 9.736842105263159e-06,
30
+ "loss": 0.3566379165649414,
31
  "step": 75
32
  },
33
  {
34
  "epoch": 0.32786885245901637,
35
+ "grad_norm": 2.589911937713623,
36
  "learning_rate": 1.3026315789473684e-05,
37
+ "loss": 0.2718839645385742,
38
  "step": 100
39
  },
40
  {
41
  "epoch": 0.4098360655737705,
42
+ "grad_norm": 22.02676773071289,
43
  "learning_rate": 1.6315789473684213e-05,
44
+ "loss": 0.1922766876220703,
45
  "step": 125
46
  },
47
  {
48
  "epoch": 0.4918032786885246,
49
+ "grad_norm": 2.6362855434417725,
50
  "learning_rate": 1.960526315789474e-05,
51
+ "loss": 0.1837622833251953,
52
  "step": 150
53
  },
54
  {
55
  "epoch": 0.5737704918032787,
56
+ "grad_norm": 3.478484630584717,
57
  "learning_rate": 1.9679533867443555e-05,
58
+ "loss": 0.18766048431396484,
59
  "step": 175
60
  },
61
  {
62
  "epoch": 0.6557377049180327,
63
+ "grad_norm": 8.077605247497559,
64
  "learning_rate": 1.9315367807720323e-05,
65
+ "loss": 0.23830581665039063,
66
  "step": 200
67
  },
68
  {
69
  "epoch": 0.7377049180327869,
70
+ "grad_norm": 0.7427046298980713,
71
  "learning_rate": 1.8951201747997088e-05,
72
+ "loss": 0.30742517471313474,
73
  "step": 225
74
  },
75
  {
76
  "epoch": 0.819672131147541,
77
+ "grad_norm": 36.34975051879883,
78
  "learning_rate": 1.8587035688273852e-05,
79
+ "loss": 0.22336017608642578,
80
  "step": 250
81
  },
82
  {
83
  "epoch": 0.9016393442622951,
84
+ "grad_norm": 5.215510845184326,
85
  "learning_rate": 1.822286962855062e-05,
86
+ "loss": 0.13779294967651368,
87
  "step": 275
88
  },
89
  {
90
  "epoch": 0.9836065573770492,
91
+ "grad_norm": 3.551121950149536,
92
  "learning_rate": 1.7858703568827385e-05,
93
+ "loss": 0.19200111389160157,
94
  "step": 300
95
  },
96
  {
97
  "epoch": 1.0,
98
+ "eval_accuracy": 0.9631901840490797,
99
+ "eval_f1": 0.7721518987341772,
100
+ "eval_loss": 0.1292734444141388,
101
+ "eval_precision": 0.7721518987341772,
102
+ "eval_recall": 0.7721518987341772,
103
+ "eval_roc_auc": 0.9563720589684741,
104
+ "eval_runtime": 3.3396,
105
+ "eval_samples_per_second": 292.853,
106
+ "eval_steps_per_second": 9.283,
107
  "step": 305
108
  },
109
  {
110
  "epoch": 1.0655737704918034,
111
+ "grad_norm": 0.5402449369430542,
112
  "learning_rate": 1.7494537509104153e-05,
113
+ "loss": 0.1241053295135498,
114
  "step": 325
115
  },
116
  {
117
  "epoch": 1.1475409836065573,
118
+ "grad_norm": 4.476892948150635,
119
  "learning_rate": 1.7130371449380918e-05,
120
+ "loss": 0.20724605560302733,
121
  "step": 350
122
  },
123
  {
124
  "epoch": 1.2295081967213115,
125
+ "grad_norm": 0.46729782223701477,
126
  "learning_rate": 1.6766205389657686e-05,
127
+ "loss": 0.13567353248596192,
128
  "step": 375
129
  },
130
  {
131
  "epoch": 1.3114754098360657,
132
+ "grad_norm": 0.1852118819952011,
133
  "learning_rate": 1.640203932993445e-05,
134
+ "loss": 0.13295170783996582,
135
  "step": 400
136
  },
137
  {
138
  "epoch": 1.3934426229508197,
139
+ "grad_norm": 1.2681413888931274,
140
  "learning_rate": 1.603787327021122e-05,
141
+ "loss": 0.2027936363220215,
142
  "step": 425
143
  },
144
  {
145
  "epoch": 1.4754098360655736,
146
+ "grad_norm": 7.484091281890869,
147
  "learning_rate": 1.5673707210487983e-05,
148
+ "loss": 0.12364128112792969,
149
  "step": 450
150
  },
151
  {
152
  "epoch": 1.5573770491803278,
153
+ "grad_norm": 0.46489500999450684,
154
  "learning_rate": 1.530954115076475e-05,
155
+ "loss": 0.14407362937927246,
156
  "step": 475
157
  },
158
  {
159
  "epoch": 1.639344262295082,
160
+ "grad_norm": 0.20967872440814972,
161
  "learning_rate": 1.4945375091041516e-05,
162
+ "loss": 0.12458925247192383,
163
  "step": 500
164
  },
165
  {
166
  "epoch": 1.721311475409836,
167
+ "grad_norm": 0.1643747240304947,
168
  "learning_rate": 1.4581209031318282e-05,
169
+ "loss": 0.21631996154785157,
170
  "step": 525
171
  },
172
  {
173
  "epoch": 1.8032786885245902,
174
+ "grad_norm": 7.073329448699951,
175
  "learning_rate": 1.4217042971595047e-05,
176
+ "loss": 0.16043865203857421,
177
  "step": 550
178
  },
179
  {
180
  "epoch": 1.8852459016393444,
181
+ "grad_norm": 1.744958758354187,
182
  "learning_rate": 1.3852876911871815e-05,
183
+ "loss": 0.0966644287109375,
184
  "step": 575
185
  },
186
  {
187
  "epoch": 1.9672131147540983,
188
+ "grad_norm": 12.79035472869873,
189
  "learning_rate": 1.3488710852148582e-05,
190
+ "loss": 0.15884541511535644,
191
  "step": 600
192
  },
193
  {
194
  "epoch": 2.0,
195
+ "eval_accuracy": 0.9611451942740287,
196
+ "eval_f1": 0.7432432432432432,
197
+ "eval_loss": 0.13287827372550964,
198
+ "eval_precision": 0.7971014492753623,
199
+ "eval_recall": 0.6962025316455697,
200
+ "eval_roc_auc": 0.9594697343039381,
201
+ "eval_runtime": 3.2739,
202
+ "eval_samples_per_second": 298.727,
203
+ "eval_steps_per_second": 9.469,
204
  "step": 610
205
  },
206
  {
207
  "epoch": 2.0491803278688523,
208
+ "grad_norm": 17.520444869995117,
209
  "learning_rate": 1.3124544792425346e-05,
210
+ "loss": 0.08896012306213379,
211
  "step": 625
212
  },
213
  {
214
  "epoch": 2.1311475409836067,
215
+ "grad_norm": 0.16623224318027496,
216
  "learning_rate": 1.2760378732702113e-05,
217
+ "loss": 0.11752216339111328,
218
  "step": 650
219
  },
220
  {
221
  "epoch": 2.2131147540983607,
222
+ "grad_norm": 0.20762814581394196,
223
  "learning_rate": 1.239621267297888e-05,
224
+ "loss": 0.1193038272857666,
225
  "step": 675
226
  },
227
  {
228
  "epoch": 2.2950819672131146,
229
+ "grad_norm": 0.1500111073255539,
230
  "learning_rate": 1.2032046613255645e-05,
231
+ "loss": 0.0630855655670166,
232
  "step": 700
233
  },
234
  {
235
  "epoch": 2.3770491803278686,
236
+ "grad_norm": 0.17727839946746826,
237
  "learning_rate": 1.1667880553532412e-05,
238
+ "loss": 0.08730959892272949,
239
  "step": 725
240
  },
241
  {
242
  "epoch": 2.459016393442623,
243
+ "grad_norm": 4.3997321128845215,
244
  "learning_rate": 1.1303714493809176e-05,
245
+ "loss": 0.12114215850830078,
246
  "step": 750
247
  },
248
  {
249
  "epoch": 2.540983606557377,
250
+ "grad_norm": 34.47224044799805,
251
  "learning_rate": 1.0939548434085944e-05,
252
+ "loss": 0.11070786476135254,
253
  "step": 775
254
  },
255
  {
256
  "epoch": 2.6229508196721314,
257
+ "grad_norm": 25.977081298828125,
258
  "learning_rate": 1.057538237436271e-05,
259
+ "loss": 0.10845686912536621,
260
  "step": 800
261
  },
262
  {
263
  "epoch": 2.7049180327868854,
264
+ "grad_norm": 0.1657736450433731,
265
  "learning_rate": 1.0211216314639475e-05,
266
+ "loss": 0.1025285530090332,
267
  "step": 825
268
  },
269
  {
270
  "epoch": 2.7868852459016393,
271
+ "grad_norm": 34.05498504638672,
272
  "learning_rate": 9.847050254916243e-06,
273
+ "loss": 0.07825160026550293,
274
  "step": 850
275
  },
276
  {
277
  "epoch": 2.8688524590163933,
278
+ "grad_norm": 0.2868161201477051,
279
  "learning_rate": 9.482884195193008e-06,
280
+ "loss": 0.12041816711425782,
281
  "step": 875
282
  },
283
  {
284
  "epoch": 2.9508196721311473,
285
+ "grad_norm": 0.19192977249622345,
286
  "learning_rate": 9.118718135469774e-06,
287
+ "loss": 0.08709416389465333,
288
  "step": 900
289
  },
290
  {
291
  "epoch": 3.0,
292
+ "eval_accuracy": 0.9703476482617587,
293
+ "eval_f1": 0.8220858895705522,
294
+ "eval_loss": 0.11163181066513062,
295
+ "eval_precision": 0.7976190476190477,
296
+ "eval_recall": 0.8481012658227848,
297
+ "eval_roc_auc": 0.9661086157615353,
298
+ "eval_runtime": 3.1733,
299
+ "eval_samples_per_second": 308.193,
300
+ "eval_steps_per_second": 9.769,
301
  "step": 915
302
  }
303
  ],
transformer/config.json CHANGED
@@ -31,16 +31,16 @@
31
  "pad_token_id": 1,
32
  "position_embedding_type": "absolute",
33
  "problem_type": "single_label_classification",
34
- "threshold": 0.4999122619628906,
35
  "tie_word_embeddings": true,
36
  "transformers_version": "5.9.0",
37
  "type_vocab_size": 1,
38
  "use_cache": false,
39
  "validation_threshold_report": {
40
- "f1": 0.8484848484848485,
41
- "precision": 0.813953488372093,
42
- "recall": 0.8860759493670886,
43
- "threshold": 0.4999122619628906
44
  },
45
  "vocab_size": 250002
46
  }
 
31
  "pad_token_id": 1,
32
  "position_embedding_type": "absolute",
33
  "problem_type": "single_label_classification",
34
+ "threshold": 0.4710787534713745,
35
  "tie_word_embeddings": true,
36
  "transformers_version": "5.9.0",
37
  "type_vocab_size": 1,
38
  "use_cache": false,
39
  "validation_threshold_report": {
40
+ "f1": 0.829268292682927,
41
+ "precision": 0.8,
42
+ "recall": 0.8607594936708861,
43
+ "threshold": 0.4710787534713745
44
  },
45
  "vocab_size": 250002
46
  }
transformer/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:572ab7a5b2bc6140bc72ff08cc111f90496ceab40c64330fc6373973a0b6830c
3
  size 1112205008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49a18c813f49f0f53eef5e1646a8e80f88eb366c956b09301312f1a23e9fe977
3
  size 1112205008
transformer/test_predictions.csv CHANGED
The diff for this file is too large to render. See raw diff
 
transformer/validation_predictions.csv CHANGED
The diff for this file is too large to render. See raw diff