clapAI
/

mmBERT-small-multilingual-sentiment

+---
+library_name: transformers
+license: mit
+base_model: jhu-clsp/mmBERT-small
+tags:
+- generated_from_trainer
+metrics:
+- f1
+- precision
+- recall
+model-index:
+- name: mmBERT-small-multilingual-sentiment
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# mmBERT-small-multilingual-sentiment
+This model is a fine-tuned version of [jhu-clsp/mmBERT-small](https://huggingface.co/jhu-clsp/mmBERT-small) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: nan
+- F1: 0.0
+- Precision: 0.0
+- Recall: 0.0
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 5e-05
+- train_batch_size: 64
+- eval_batch_size: 64
+- seed: 0
+- distributed_type: multi-GPU
+- num_devices: 2
+- gradient_accumulation_steps: 8
+- total_train_batch_size: 1024
+- total_eval_batch_size: 128
+- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_ratio: 0.01
+- num_epochs: 5
+### Training results
+| Training Loss | Epoch | Step | Validation Loss | F1  | Precision | Recall |
+|:-------------:|:-----:|:----:|:---------------:|:---:|:---------:|:------:|
+| 3407638.4     | 1.0   | 10   | nan             | 0.0 | 0.0       | 0.0    |
+| 0.0           | 2.0   | 20   | nan             | 0.0 | 0.0       | 0.0    |
+| 0.0           | 3.0   | 30   | nan             | 0.0 | 0.0       | 0.0    |
+| 0.0           | 4.0   | 40   | nan             | 0.0 | 0.0       | 0.0    |
+| 0.0           | 5.0   | 50   | nan             | 0.0 | 0.0       | 0.0    |
+### Framework versions
+- Transformers 4.55.0
+- Pytorch 2.8.0+cu128
+- Datasets 3.6.0
+- Tokenizers 0.21.4

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:97b773ba16c37193c62c683944b7563f46760aace9ceb7c937b4ad40633c0073
 size 281299686

 version https://git-lfs.github.com/spec/v1
+oid sha256:52c37d8bd5cefa5fd5c9765c7d94df00795f0b564d743232bbccbb4ec50c95bb
 size 281299686

trainer_state.json ADDED Viewed

	@@ -0,0 +1,133 @@

+{
+  "best_global_step": 10,
+  "best_metric": 0.0,
+  "best_model_checkpoint": "./finetuning-checkpoints/checkpoint-10",
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 50,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "grad_norm": 1.4142135381698608,
+      "learning_rate": 4.678296760308474e-05,
+      "loss": 3407638.4,
+      "step": 10
+    },
+    {
+      "epoch": 1.0,
+      "eval_f1": 0.0,
+      "eval_loss": NaN,
+      "eval_precision": 0.0,
+      "eval_recall": 0.0,
+      "eval_runtime": 3.267,
+      "eval_samples_per_second": 1530.438,
+      "eval_steps_per_second": 12.244,
+      "step": 10
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 1.4142135381698608,
+      "learning_rate": 3.5119583578059846e-05,
+      "loss": 0.0,
+      "step": 20
+    },
+    {
+      "epoch": 2.0,
+      "eval_f1": 0.0,
+      "eval_loss": NaN,
+      "eval_precision": 0.0,
+      "eval_recall": 0.0,
+      "eval_runtime": 1.7505,
+      "eval_samples_per_second": 2856.327,
+      "eval_steps_per_second": 22.851,
+      "step": 20
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 1.4142135381698608,
+      "learning_rate": 1.9436976651092144e-05,
+      "loss": 0.0,
+      "step": 30
+    },
+    {
+      "epoch": 3.0,
+      "eval_f1": 0.0,
+      "eval_loss": NaN,
+      "eval_precision": 0.0,
+      "eval_recall": 0.0,
+      "eval_runtime": 1.7509,
+      "eval_samples_per_second": 2855.719,
+      "eval_steps_per_second": 22.846,
+      "step": 30
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 1.4142135381698608,
+      "learning_rate": 5.9638510407716394e-06,
+      "loss": 0.0,
+      "step": 40
+    },
+    {
+      "epoch": 4.0,
+      "eval_f1": 0.0,
+      "eval_loss": NaN,
+      "eval_precision": 0.0,
+      "eval_recall": 0.0,
+      "eval_runtime": 1.7512,
+      "eval_samples_per_second": 2855.169,
+      "eval_steps_per_second": 22.841,
+      "step": 40
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 1.4142135381698608,
+      "learning_rate": 5.136518124159162e-08,
+      "loss": 0.0,
+      "step": 50
+    },
+    {
+      "epoch": 5.0,
+      "eval_f1": 0.0,
+      "eval_loss": NaN,
+      "eval_precision": 0.0,
+      "eval_recall": 0.0,
+      "eval_runtime": 1.7483,
+      "eval_samples_per_second": 2859.862,
+      "eval_steps_per_second": 22.879,
+      "step": 50
+    },
+    {
+      "epoch": 5.0,
+      "step": 50,
+      "total_flos": 1.315211864702976e+16,
+      "train_loss": 681527.68,
+      "train_runtime": 107.016,
+      "train_samples_per_second": 467.22,
+      "train_steps_per_second": 0.467
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 50,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.315211864702976e+16,
+  "train_batch_size": 64,
+  "trial_name": null,
+  "trial_params": null
+}