Bukareszt commited on Oct 20, 2025

Commit

d62a59f

verified ·

1 Parent(s): a631824

Initial push

Browse files

Files changed (26) hide show

.gitattributes +1 -0
README.md +58 -179
all_results.json +16 -0
classification_report.txt +156 -0
config.json +60 -68
configuration_roberta.py +151 -0
confusion_matrix.png +3 -0
label_info.json +71 -0
logs/events.out.tfevents.1760994777.a5b7e37e7852.6366.0 +3 -0
logs/events.out.tfevents.1760994974.a5b7e37e7852.6366.1 +3 -0
logs/events.out.tfevents.1760995186.a5b7e37e7852.6366.2 +3 -0
logs/events.out.tfevents.1760995679.a5b7e37e7852.6366.3 +3 -0
logs/events.out.tfevents.1760995793.a5b7e37e7852.20606.0 +3 -0
logs/events.out.tfevents.1760995936.a5b7e37e7852.21329.0 +3 -0
logs/events.out.tfevents.1760996283.a5b7e37e7852.21329.1 +3 -0
logs/events.out.tfevents.1760996482.a5b7e37e7852.21329.2 +3 -0
logs/events.out.tfevents.1760996930.a5b7e37e7852.21329.3 +3 -0
logs/events.out.tfevents.1761000162.a5b7e37e7852.21329.4 +3 -0
logs/events.out.tfevents.1761000278.a5b7e37e7852.39741.0 +3 -0
logs/events.out.tfevents.1761000725.a5b7e37e7852.39741.1 +3 -0
model.safetensors +2 -2
test_results.json +11 -0
tokenizer.json +1 -6
tokenizer_config.json +0 -4
train_results.json +8 -0
training_args.bin +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+confusion_matrix.png filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,199 +1,78 @@
 ---
 library_name: transformers
-tags: []
 ---
-# Model Card for Model ID
-<!-- Provide a quick summary of what the model is/does. -->
-## Model Details
-### Model Description
-<!-- Provide a longer summary of what this model is. -->
-This is the model card of a 🤗 transformers model that has been pushed on the Hub. This model card has been automatically generated.
-- **Developed by:** [More Information Needed]
-- **Funded by [optional]:** [More Information Needed]
-- **Shared by [optional]:** [More Information Needed]
-- **Model type:** [More Information Needed]
-- **Language(s) (NLP):** [More Information Needed]
-- **License:** [More Information Needed]
-- **Finetuned from model [optional]:** [More Information Needed]
-### Model Sources [optional]
-<!-- Provide the basic links for the model. -->
-- **Repository:** [More Information Needed]
-- **Paper [optional]:** [More Information Needed]
-- **Demo [optional]:** [More Information Needed]
-## Uses
-<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
-### Direct Use
-<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
-[More Information Needed]
-### Downstream Use [optional]
-<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
-[More Information Needed]
-### Out-of-Scope Use
-<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
-[More Information Needed]
-## Bias, Risks, and Limitations
-<!-- This section is meant to convey both technical and sociotechnical limitations. -->
-[More Information Needed]
-### Recommendations
-<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
-Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
-## How to Get Started with the Model
-Use the code below to get started with the model.
-[More Information Needed]
-## Training Details
-### Training Data
-<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
-[More Information Needed]
-### Training Procedure
-<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
-#### Preprocessing [optional]
-[More Information Needed]
-#### Training Hyperparameters
-- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
-#### Speeds, Sizes, Times [optional]
-<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
-[More Information Needed]
-## Evaluation
-<!-- This section describes the evaluation protocols and provides the results. -->
-### Testing Data, Factors & Metrics
-#### Testing Data
-<!-- This should link to a Dataset Card if possible. -->
-[More Information Needed]
-#### Factors
-<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
-[More Information Needed]
-#### Metrics
-<!-- These are the evaluation metrics being used, ideally with a description of why. -->
-[More Information Needed]
-### Results
-[More Information Needed]
-#### Summary
-## Model Examination [optional]
-<!-- Relevant interpretability work for the model goes here -->
-[More Information Needed]
-## Environmental Impact
-<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
-Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
-- **Hardware Type:** [More Information Needed]
-- **Hours used:** [More Information Needed]
-- **Cloud Provider:** [More Information Needed]
-- **Compute Region:** [More Information Needed]
-- **Carbon Emitted:** [More Information Needed]
-## Technical Specifications [optional]
-### Model Architecture and Objective
-[More Information Needed]
-### Compute Infrastructure
-[More Information Needed]
-#### Hardware
-[More Information Needed]
-#### Software
-[More Information Needed]
-## Citation [optional]
-<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
-**BibTeX:**
-[More Information Needed]
-**APA:**
-[More Information Needed]
-## Glossary [optional]
-<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
-[More Information Needed]
-## More Information [optional]
-[More Information Needed]
-## Model Card Authors [optional]
-[More Information Needed]
-## Model Card Contact
-[More Information Needed]

 ---
 library_name: transformers
+license: apache-2.0
+base_model: PKOBP/polish-roberta-8k
+tags:
+- generated_from_trainer
+metrics:
+- accuracy
+- precision
+- recall
+- f1
+model-index:
+- name: mwik-classifier-xd
+  results: []
 ---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# mwik-classifier-xd
+This model is a fine-tuned version of [PKOBP/polish-roberta-8k](https://huggingface.co/PKOBP/polish-roberta-8k) on the None dataset.
+It achieves the following results on the evaluation set:
+- Loss: 1.1007
+- Accuracy: 0.7838
+- Precision: 0.7630
+- Recall: 0.7838
+- F1: 0.7601
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 1e-05
+- train_batch_size: 24
+- eval_batch_size: 48
+- seed: 42
+- gradient_accumulation_steps: 4
+- total_train_batch_size: 96
+- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: polynomial
+- lr_scheduler_warmup_ratio: 0.06
+- num_epochs: 8
+- mixed_precision_training: Native AMP
+### Training results
+| Training Loss | Epoch | Step | Validation Loss | Accuracy | Precision | Recall | F1     |
+|:-------------:|:-----:|:----:|:---------------:|:--------:|:---------:|:------:|:------:|
+| No log        | 1.0   | 40   | 2.7021          | 0.3856   | 0.2986    | 0.3856 | 0.2870 |
+| 3.0953        | 2.0   | 80   | 1.9267          | 0.5958   | 0.5010    | 0.5958 | 0.5128 |
+| 2.1153        | 3.0   | 120  | 1.5299          | 0.6978   | 0.6806    | 0.6978 | 0.6399 |
+| 1.5767        | 4.0   | 160  | 1.3317          | 0.7376   | 0.7340    | 0.7376 | 0.7022 |
+| 1.2985        | 5.0   | 200  | 1.2154          | 0.7674   | 0.7460    | 0.7674 | 0.7407 |
+| 1.2985        | 6.0   | 240  | 1.1614          | 0.7749   | 0.7545    | 0.7749 | 0.7515 |
+| 1.1262        | 7.0   | 280  | 1.1227          | 0.7799   | 0.7575    | 0.7799 | 0.7605 |
+| 1.0373        | 8.0   | 320  | 1.1079          | 0.7786   | 0.7540    | 0.7786 | 0.7588 |
+### Framework versions
+- Transformers 4.57.1
+- Pytorch 2.8.0+cu126
+- Datasets 4.0.0
+- Tokenizers 0.22.1

all_results.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "epoch": 8.0,
+    "eval_accuracy": 0.7838150289017342,
+    "eval_f1": 0.760134834840537,
+    "eval_loss": 1.1006990671157837,
+    "eval_precision": 0.7630015333272686,
+    "eval_recall": 0.7838150289017342,
+    "eval_runtime": 3.2334,
+    "eval_samples_per_second": 267.523,
+    "eval_steps_per_second": 5.876,
+    "total_flos": 2.6699656498043904e+16,
+    "train_loss": 1.6651537001132966,
+    "train_runtime": 443.5681,
+    "train_samples_per_second": 69.257,
+    "train_steps_per_second": 0.721
+}

classification_report.txt ADDED Viewed

	@@ -0,0 +1,156 @@

+================================================================================
+DETAILED CLASSIFICATION REPORT (Top-1)
+================================================================================
+              precision    recall  f1-score   support
+DIERZ_ST_HYD     1.0000    0.6364    0.7778        11
+     INFO_DW     0.0000    0.0000    0.0000         7
+NEGOC_DESZCZ     0.9091    0.9091    0.9091        11
+  OP_SIEC_WK     0.5385    1.0000    0.7000        14
+       OP_UM     1.0000    0.8571    0.9231         7
+POZ_SPR_WIND     0.6538    0.9444    0.7727        18
+        PRZE     1.0000    1.0000    1.0000         8
+  REKLAMACJA     0.8667    1.0000    0.9286        13
+  UM_PARTYCY     1.0000    0.1667    0.2857         6
+WOD_OGR_PRZY     1.0000    0.8571    0.9231         7
+     ZG_ODCZ     0.8000    0.8571    0.8276        14
+     ZW_NADP     0.0000    0.0000    0.0000         5
+    accuracy                         0.7769       121
+   macro avg     0.7307    0.6857    0.6706       121
+weighted avg     0.7502    0.7769    0.7319       121
+================================================================================
+DETAILED CLASSIFICATION REPORT (Top-1)
+================================================================================
+              precision    recall  f1-score   support
+       BINFO     0.6957    0.9412    0.8000        17
+   DANE_ARCH     1.0000    0.1538    0.2667        13
+DIERZ_ST_HYD     0.8056    0.8529    0.8286        34
+     INFO_DW     0.9333    0.6087    0.7368        23
+        INSP     0.9286    0.6500    0.7647        20
+ INTERW_AW_K     0.6000    0.9273    0.7286        55
+ INTERW_AW_W     0.6133    0.8070    0.6970        57
+ INTERW_ODTW     0.6000    0.8889    0.7164        27
+  INTERW_ZAP     0.8667    0.8667    0.8667        15
+NEGOC_DESZCZ     0.7381    0.9394    0.8267        33
+    ODWOD_KS     1.0000    0.8333    0.9091         6
+  OP_PRZY_WK     0.0000    0.0000    0.0000        16
+  OP_SIEC_WK     0.5366    0.5116    0.5238        43
+       OP_UM     0.7000    0.9545    0.8077        22
+     POZYTYW     0.0000    0.0000    0.0000        17
+POZ_SPR_WIND     0.7746    0.9016    0.8333        61
+        PRZE     0.7541    0.9020    0.8214        51
+         PYT     1.0000    0.2258    0.3684        31
+  REKLAMACJA     0.7798    0.8947    0.8333        95
+    ROW_EKSP     0.8750    0.5385    0.6667        13
+          SK     0.5882    0.5556    0.5714        36
+    UDOST_WN     0.9091    0.8333    0.8696        12
+          UM     0.0000    0.0000    0.0000        14
+  UM_PARTYCY     0.8750    1.0000    0.9333        21
+  UZN_SCIEKI     0.0000    0.0000    0.0000         7
+  UZ_SIEC_WK     1.0000    0.2308    0.3750        13
+WAR_PRZY_SIE     0.0000    0.0000    0.0000         7
+      WAR_WK     0.4000    0.6000    0.4800        10
+    WAR_WKKD     0.5556    0.4167    0.4762        12
+WOD_OGR_PRZY     0.9091    0.9524    0.9302        21
+   WPIN_SIEC     1.0000    0.8182    0.9000        11
+ WYM_PRZY_WK     0.0000    0.0000    0.0000         7
+   ZASW_KONC     0.8000    0.6667    0.7273        12
+     ZG_ODCZ     0.8511    0.9091    0.8791        44
+          ZM     0.7000    0.9655    0.8116        29
+     ZW_NADP     0.7059    0.8000    0.7500        15
+    accuracy                         0.7272       920
+   macro avg     0.6526    0.6152    0.6028       920
+weighted avg     0.6984    0.7272    0.6872       920
+================================================================================
+DETAILED CLASSIFICATION REPORT (Top-1)
+================================================================================
+              precision    recall  f1-score   support
+       BINFO     0.7368    0.8235    0.7778        17
+   DANE_ARCH     0.8571    0.4615    0.6000        13
+DIERZ_ST_HYD     0.8889    0.9412    0.9143        34
+     INFO_DW     0.7826    0.7826    0.7826        23
+        INSP     1.0000    1.0000    1.0000        13
+ INTERW_AW_K     0.6184    0.8545    0.7176        55
+ INTERW_AW_W     0.7101    0.8596    0.7778        57
+ INTERW_ODTW     0.7143    0.9259    0.8065        27
+  INTERW_ZAP     0.8125    0.8667    0.8387        15
+NEGOC_DESZCZ     0.9118    0.9394    0.9254        33
+    ODWOD_KS     0.8000    0.6667    0.7273         6
+  OP_PRZY_WK     0.0000    0.0000    0.0000        16
+  OP_SIEC_WK     0.6765    0.5349    0.5974        43
+       OP_UM     0.7692    0.9091    0.8333        22
+     POZYTYW     0.0000    0.0000    0.0000        17
+POZ_SPR_WIND     0.9032    0.9180    0.9106        61
+        PRZE     0.7538    0.9608    0.8448        51
+         PYT     0.4286    0.2903    0.3462        31
+  REKLAMACJA     0.8469    0.9326    0.8877        89
+    ROW_EKSP     0.8182    0.6923    0.7500        13
+          SK     0.5517    0.5517    0.5517        29
+    UDOST_WN     0.9091    0.8333    0.8696        12
+  UM_PARTYCY     0.9130    1.0000    0.9545        21
+  UZ_SIEC_WK     0.8889    0.6154    0.7273        13
+      WAR_WK     0.5000    0.4000    0.4444        10
+    WAR_WKKD     0.5556    0.4167    0.4762        12
+WOD_OGR_PRZY     0.9091    0.9524    0.9302        21
+   WPIN_SIEC     0.8750    0.6364    0.7368        11
+   ZASW_KONC     0.8889    0.6667    0.7619        12
+     ZG_ODCZ     0.9535    0.9318    0.9425        44
+          ZM     0.8667    0.8966    0.8814        29
+     ZW_NADP     0.9286    0.8667    0.8966        15
+    accuracy                         0.7861       865
+   macro avg     0.7428    0.7227    0.7253       865
+weighted avg     0.7564    0.7861    0.7649       865
+================================================================================
+DETAILED CLASSIFICATION REPORT (Top-1)
+================================================================================
+              precision    recall  f1-score   support
+       BINFO     0.8421    0.9412    0.8889        17
+   DANE_ARCH     1.0000    0.3846    0.5556        13
+DIERZ_ST_HYD     0.9143    0.9412    0.9275        34
+     INFO_DW     0.8095    0.7391    0.7727        23
+        INSP     1.0000    1.0000    1.0000        13
+ INTERW_AW_K     0.6184    0.8545    0.7176        55
+ INTERW_AW_W     0.6618    0.7895    0.7200        57
+ INTERW_ODTW     0.5952    0.9259    0.7246        27
+  INTERW_ZAP     0.8333    0.6667    0.7407        15
+NEGOC_DESZCZ     0.8857    0.9394    0.9118        33
+    ODWOD_KS     1.0000    0.8333    0.9091         6
+  OP_PRZY_WK     0.0000    0.0000    0.0000        16
+  OP_SIEC_WK     0.6667    0.5116    0.5789        43
+       OP_UM     0.8077    0.9545    0.8750        22
+     POZYTYW     0.0000    0.0000    0.0000        17
+POZ_SPR_WIND     0.9048    0.9344    0.9194        61
+        PRZE     0.7656    0.9608    0.8522        51
+         PYT     0.4000    0.1935    0.2609        31
+  REKLAMACJA     0.8400    0.9438    0.8889        89
+    ROW_EKSP     0.6875    0.8462    0.7586        13
+          SK     0.5600    0.4828    0.5185        29
+    UDOST_WN     1.0000    0.8333    0.9091        12
+  UM_PARTYCY     0.8750    1.0000    0.9333        21
+  UZ_SIEC_WK     1.0000    0.6154    0.7619        13
+      WAR_WK     0.3889    0.7000    0.5000        10
+    WAR_WKKD     1.0000    0.2500    0.4000        12
+WOD_OGR_PRZY     0.9500    0.9048    0.9268        21
+   WPIN_SIEC     0.9091    0.9091    0.9091        11
+   ZASW_KONC     1.0000    0.6667    0.8000        12
+     ZG_ODCZ     0.8913    0.9318    0.9111        44
+          ZM     0.9310    0.9310    0.9310        29
+     ZW_NADP     0.9333    0.9333    0.9333        15
+    accuracy                         0.7838       865
+   macro avg     0.7710    0.7350    0.7324       865
+weighted avg     0.7630    0.7838    0.7601       865

config.json CHANGED Viewed

@@ -23,80 +23,72 @@
   "id2label": {
     "0": "BINFO",
     "1": "DANE_ARCH",
-    "2": "INFO_DW",
-    "3": "INSP",
-    "4": "INTERW_AW_K",
-    "5": "INTERW_AW_W",
-    "6": "INTERW_ODTW",
-    "7": "INTERW_ZAP",
-    "8": "NEGOC_DESZCZ",
-    "9": "ODWOD_KS",
-    "10": "OKR_WŁ_PRZEW",
-    "11": "OP_SIEC_WK",
-    "12": "OP_UM",
-    "13": "POZYTYW",
-    "14": "POZ_SPR_WIND",
-    "15": "PRZE",
-    "16": "PRZEK_SIEĆ",
-    "17": "PRZEN_WOD",
-    "18": "PYT",
-    "19": "REKLAMACJA",
-    "20": "ROW_EKSP",
-    "21": "SK",
-    "22": "UDOST_WN",
-    "23": "UM",
-    "24": "UM_PARTYCY",
-    "25": "UZN_ŚCIEKI",
-    "26": "UZ_SIEĆ_WK",
-    "27": "WAR_PRZY_SIE",
-    "28": "WAR_W+K",
-    "29": "WAR_W+K+KD",
-    "30": "WOD_OGR_PRZY",
-    "31": "WYM_PRZYŁ_WK",
-    "32": "ZAŚW_KOŃC",
-    "33": "ZGŁ_ODCZ",
-    "34": "ZM",
-    "35": "ZW_NADPŁ"
   },
   "initializer_range": 0.02,
   "intermediate_size": 4096,
   "label2id": {
     "BINFO": 0,
     "DANE_ARCH": 1,
-    "INFO_DW": 2,
-    "INSP": 3,
-    "INTERW_AW_K": 4,
-    "INTERW_AW_W": 5,
-    "INTERW_ODTW": 6,
-    "INTERW_ZAP": 7,
-    "NEGOC_DESZCZ": 8,
-    "ODWOD_KS": 9,
-    "OKR_WŁ_PRZEW": 10,
-    "OP_SIEC_WK": 11,
-    "OP_UM": 12,
-    "POZYTYW": 13,
-    "POZ_SPR_WIND": 14,
-    "PRZE": 15,
-    "PRZEK_SIEĆ": 16,
-    "PRZEN_WOD": 17,
-    "PYT": 18,
-    "REKLAMACJA": 19,
-    "ROW_EKSP": 20,
-    "SK": 21,
-    "UDOST_WN": 22,
-    "UM": 23,
-    "UM_PARTYCY": 24,
-    "UZN_ŚCIEKI": 25,
-    "UZ_SIEĆ_WK": 26,
-    "WAR_PRZY_SIE": 27,
-    "WAR_W+K": 28,
-    "WAR_W+K+KD": 29,
-    "WOD_OGR_PRZY": 30,
-    "WYM_PRZYŁ_WK": 31,
-    "ZAŚW_KOŃC": 32,
-    "ZGŁ_ODCZ": 33,
-    "ZM": 34,
-    "ZW_NADPŁ": 35
   },
   "layer_norm_eps": 1e-05,
   "max_position_embeddings": 8194,

   "id2label": {
     "0": "BINFO",
     "1": "DANE_ARCH",
+    "2": "DIERZ_ST_HYD",
+    "3": "INFO_DW",
+    "4": "INSP",
+    "5": "INTERW_AW_K",
+    "6": "INTERW_AW_W",
+    "7": "INTERW_ODTW",
+    "8": "INTERW_ZAP",
+    "9": "NEGOC_DESZCZ",
+    "10": "ODWOD_KS",
+    "11": "OP_PRZY_WK",
+    "12": "OP_SIEC_WK",
+    "13": "OP_UM",
+    "14": "POZYTYW",
+    "15": "POZ_SPR_WIND",
+    "16": "PRZE",
+    "17": "PYT",
+    "18": "REKLAMACJA",
+    "19": "ROW_EKSP",
+    "20": "SK",
+    "21": "UDOST_WN",
+    "22": "UM_PARTYCY",
+    "23": "UZ_SIEC_WK",
+    "24": "WAR_WK",
+    "25": "WAR_WKKD",
+    "26": "WOD_OGR_PRZY",
+    "27": "WPIN_SIEC",
+    "28": "ZASW_KONC",
+    "29": "ZG_ODCZ",
+    "30": "ZM",
+    "31": "ZW_NADP"
   },
   "initializer_range": 0.02,
   "intermediate_size": 4096,
   "label2id": {
     "BINFO": 0,
     "DANE_ARCH": 1,
+    "DIERZ_ST_HYD": 2,
+    "INFO_DW": 3,
+    "INSP": 4,
+    "INTERW_AW_K": 5,
+    "INTERW_AW_W": 6,
+    "INTERW_ODTW": 7,
+    "INTERW_ZAP": 8,
+    "NEGOC_DESZCZ": 9,
+    "ODWOD_KS": 10,
+    "OP_PRZY_WK": 11,
+    "OP_SIEC_WK": 12,
+    "OP_UM": 13,
+    "POZYTYW": 14,
+    "POZ_SPR_WIND": 15,
+    "PRZE": 16,
+    "PYT": 17,
+    "REKLAMACJA": 18,
+    "ROW_EKSP": 19,
+    "SK": 20,
+    "UDOST_WN": 21,
+    "UM_PARTYCY": 22,
+    "UZ_SIEC_WK": 23,
+    "WAR_WK": 24,
+    "WAR_WKKD": 25,
+    "WOD_OGR_PRZY": 26,
+    "WPIN_SIEC": 27,
+    "ZASW_KONC": 28,
+    "ZG_ODCZ": 29,
+    "ZM": 30,
+    "ZW_NADP": 31
   },
   "layer_norm_eps": 1e-05,
   "max_position_embeddings": 8194,

configuration_roberta.py ADDED Viewed

	@@ -0,0 +1,151 @@

+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" RoBERTa configuration"""
+from collections import OrderedDict
+from typing import Mapping
+from transformers import PretrainedConfig
+from transformers.onnx import OnnxConfig
+from transformers.utils import logging
+logger = logging.get_logger(__name__)
+class RobertaConfig(PretrainedConfig):
+    r"""
+    This is the configuration class to store the configuration of a [`RobertaModel`] or a [`TFRobertaModel`]. It is
+    used to instantiate a RoBERTa model according to the specified arguments, defining the model architecture.
+    Instantiating a configuration with the defaults will yield a similar configuration to that of the RoBERTa
+    [FacebookAI/roberta-base](https://huggingface.co/FacebookAI/roberta-base) architecture.
+    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
+    documentation from [`PretrainedConfig`] for more information.
+    Args:
+        vocab_size (`int`, *optional*, defaults to 50265):
+            Vocabulary size of the RoBERTa model. Defines the number of different tokens that can be represented by the
+            `inputs_ids` passed when calling [`RobertaModel`] or [`TFRobertaModel`].
+        hidden_size (`int`, *optional*, defaults to 768):
+            Dimensionality of the encoder layers and the pooler layer.
+        num_hidden_layers (`int`, *optional*, defaults to 12):
+            Number of hidden layers in the Transformer encoder.
+        num_attention_heads (`int`, *optional*, defaults to 12):
+            Number of attention heads for each attention layer in the Transformer encoder.
+        intermediate_size (`int`, *optional*, defaults to 3072):
+            Dimensionality of the "intermediate" (often named feed-forward) layer in the Transformer encoder.
+        hidden_act (`str` or `Callable`, *optional*, defaults to `"gelu"`):
+            The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
+            `"relu"`, `"silu"` and `"gelu_new"` are supported.
+        hidden_dropout_prob (`float`, *optional*, defaults to 0.1):
+            The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
+        attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
+            The dropout ratio for the attention probabilities.
+        max_position_embeddings (`int`, *optional*, defaults to 512):
+            The maximum sequence length that this model might ever be used with. Typically set this to something large
+            just in case (e.g., 512 or 1024 or 2048).
+        type_vocab_size (`int`, *optional*, defaults to 2):
+            The vocabulary size of the `token_type_ids` passed when calling [`RobertaModel`] or [`TFRobertaModel`].
+        initializer_range (`float`, *optional*, defaults to 0.02):
+            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
+        layer_norm_eps (`float`, *optional*, defaults to 1e-12):
+            The epsilon used by the layer normalization layers.
+        position_embedding_type (`str`, *optional*, defaults to `"absolute"`):
+            Type of position embedding. Choose one of `"absolute"`, `"relative_key"`, `"relative_key_query"`. For
+            positional embeddings use `"absolute"`. For more information on `"relative_key"`, please refer to
+            [Self-Attention with Relative Position Representations (Shaw et al.)](https://arxiv.org/abs/1803.02155).
+            For more information on `"relative_key_query"`, please refer to *Method 4* in [Improve Transformer Models
+            with Better Relative Position Embeddings (Huang et al.)](https://arxiv.org/abs/2009.13658).
+        is_decoder (`bool`, *optional*, defaults to `False`):
+            Whether the model is used as a decoder or not. If `False`, the model is used as an encoder.
+        use_cache (`bool`, *optional*, defaults to `True`):
+            Whether or not the model should return the last key/values attentions (not used by all models). Only
+            relevant if `config.is_decoder=True`.
+        classifier_dropout (`float`, *optional*):
+            The dropout ratio for the classification head.
+    Examples:
+    ```python
+    >>> from transformers import RobertaConfig, RobertaModel
+    >>> # Initializing a RoBERTa configuration
+    >>> configuration = RobertaConfig()
+    >>> # Initializing a model (with random weights) from the configuration
+    >>> model = RobertaModel(configuration)
+    >>> # Accessing the model configuration
+    >>> configuration = model.config
+    ```"""
+    model_type = "roberta"
+    def __init__(
+        self,
+        vocab_size=50265,
+        hidden_size=768,
+        num_hidden_layers=12,
+        num_attention_heads=12,
+        intermediate_size=3072,
+        hidden_act="gelu",
+        hidden_dropout_prob=0.1,
+        attention_probs_dropout_prob=0.1,
+        max_position_embeddings=512,
+        type_vocab_size=2,
+        initializer_range=0.02,
+        layer_norm_eps=1e-12,
+        pad_token_id=1,
+        bos_token_id=0,
+        eos_token_id=2,
+        position_embedding_type="absolute",
+        use_cache=True,
+        classifier_dropout=None,
+        **kwargs,
+    ):
+        super().__init__(pad_token_id=pad_token_id, bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
+        self.vocab_size = vocab_size
+        self.hidden_size = hidden_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        self.hidden_act = hidden_act
+        self.intermediate_size = intermediate_size
+        self.hidden_dropout_prob = hidden_dropout_prob
+        self.attention_probs_dropout_prob = attention_probs_dropout_prob
+        self.max_position_embeddings = max_position_embeddings
+        self.type_vocab_size = type_vocab_size
+        self.initializer_range = initializer_range
+        self.layer_norm_eps = layer_norm_eps
+        self.position_embedding_type = position_embedding_type
+        self.use_cache = use_cache
+        self.classifier_dropout = classifier_dropout
+class RobertaOnnxConfig(OnnxConfig):
+    @property
+    def inputs(self) -> Mapping[str, Mapping[int, str]]:
+        if self.task == "multiple-choice":
+            dynamic_axis = {0: "batch", 1: "choice", 2: "sequence"}
+        else:
+            dynamic_axis = {0: "batch", 1: "sequence"}
+        return OrderedDict(
+            [
+                ("input_ids", dynamic_axis),
+                ("attention_mask", dynamic_axis),
+            ]
+        )

confusion_matrix.png ADDED Viewed

Git LFS Details

SHA256: 36f81342a3a04b624b7617fd94926436b4d06efd26eeddbabbd6d2df17b9183b
Pointer size: 131 Bytes
Size of remote file: 385 kB

label_info.json ADDED Viewed

	@@ -0,0 +1,71 @@

+{
+  "label2id": {
+    "BINFO": 0,
+    "DANE_ARCH": 1,
+    "DIERZ_ST_HYD": 2,
+    "INFO_DW": 3,
+    "INSP": 4,
+    "INTERW_AW_K": 5,
+    "INTERW_AW_W": 6,
+    "INTERW_ODTW": 7,
+    "INTERW_ZAP": 8,
+    "NEGOC_DESZCZ": 9,
+    "ODWOD_KS": 10,
+    "OP_PRZY_WK": 11,
+    "OP_SIEC_WK": 12,
+    "OP_UM": 13,
+    "POZYTYW": 14,
+    "POZ_SPR_WIND": 15,
+    "PRZE": 16,
+    "PYT": 17,
+    "REKLAMACJA": 18,
+    "ROW_EKSP": 19,
+    "SK": 20,
+    "UDOST_WN": 21,
+    "UM_PARTYCY": 22,
+    "UZ_SIEC_WK": 23,
+    "WAR_WK": 24,
+    "WAR_WKKD": 25,
+    "WOD_OGR_PRZY": 26,
+    "WPIN_SIEC": 27,
+    "ZASW_KONC": 28,
+    "ZG_ODCZ": 29,
+    "ZM": 30,
+    "ZW_NADP": 31
+  },
+  "id2label": {
+    "0": "BINFO",
+    "1": "DANE_ARCH",
+    "2": "DIERZ_ST_HYD",
+    "3": "INFO_DW",
+    "4": "INSP",
+    "5": "INTERW_AW_K",
+    "6": "INTERW_AW_W",
+    "7": "INTERW_ODTW",
+    "8": "INTERW_ZAP",
+    "9": "NEGOC_DESZCZ",
+    "10": "ODWOD_KS",
+    "11": "OP_PRZY_WK",
+    "12": "OP_SIEC_WK",
+    "13": "OP_UM",
+    "14": "POZYTYW",
+    "15": "POZ_SPR_WIND",
+    "16": "PRZE",
+    "17": "PYT",
+    "18": "REKLAMACJA",
+    "19": "ROW_EKSP",
+    "20": "SK",
+    "21": "UDOST_WN",
+    "22": "UM_PARTYCY",
+    "23": "UZ_SIEC_WK",
+    "24": "WAR_WK",
+    "25": "WAR_WKKD",
+    "26": "WOD_OGR_PRZY",
+    "27": "WPIN_SIEC",
+    "28": "ZASW_KONC",
+    "29": "ZG_ODCZ",
+    "30": "ZM",
+    "31": "ZW_NADP"
+  },
+  "num_labels": 32
+}

logs/events.out.tfevents.1760994777.a5b7e37e7852.6366.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4dd3ea45a35343426d035ae1baad1b59a01961d0b26d24e58528cf54a6eef493
+size 11381

logs/events.out.tfevents.1760994974.a5b7e37e7852.6366.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bece3587ff7bfcfc5aa0d72203679228ae56a13e5d48d1155c164d7b756537f2
+size 551

logs/events.out.tfevents.1760995186.a5b7e37e7852.6366.2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e40331f5bf5d23a204a6aa240dce982db039918788ac436fb9d944fbb36044b
+size 12208

logs/events.out.tfevents.1760995679.a5b7e37e7852.6366.3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cc6c58a8ce7d749156302bab297e681aa3ae4b5d8850d3ac41cbe57b1cea1dea
+size 4184

logs/events.out.tfevents.1760995793.a5b7e37e7852.20606.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0a67e939f7e6f36138f7944886531c2009e73a4b3007e24974ebd40a39ca30bb
+size 8324

logs/events.out.tfevents.1760995936.a5b7e37e7852.21329.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:655b571215a50a6c23d29456b7c43e5c55eb7828355e1dac6b3ca912d0de1e9a
+size 11406

logs/events.out.tfevents.1760996283.a5b7e37e7852.21329.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:37317c2622f3532fea3fd026c622bce0013406d92306354d932b52fa6d5ccd98
+size 560

logs/events.out.tfevents.1760996482.a5b7e37e7852.21329.2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6e122ba20aba7bc7a35a48f3edc91f2468f0980b83fbfd46b5f233c4e675cdd4
+size 12378

logs/events.out.tfevents.1760996930.a5b7e37e7852.21329.3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:01d1e94efab1dc7959ed8eaba516302225bfd6b7a7b3d996c36124c39bbab3f4
+size 560

logs/events.out.tfevents.1761000162.a5b7e37e7852.21329.4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:89222f87ad44c65ad4e1d534bd8a8a3aa655471d0b2a047a000ca29d5acf4ea2
+size 4184

logs/events.out.tfevents.1761000278.a5b7e37e7852.39741.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:89f68038a0b7ed689e090ffef495d6a5e7dbcafed5ce19161dfaa37213e85462
+size 12378

logs/events.out.tfevents.1761000725.a5b7e37e7852.39741.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f5dbbbd28fedb8a3595d0fe7b21c31e3e3c106dcddff644cd6a4831ffcda0add
+size 560

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:76be2eae12e9d636194d8cd326570e32f11ba555b8b2d91fce90738b0666ee2a
-size 1771757024

 version https://git-lfs.github.com/spec/v1
+oid sha256:62db5e122f9f41163da858efd8df098e45b4837daa5e352df66f01195be5b08e
+size 1771740624

test_results.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+    "epoch": 8.0,
+    "eval_accuracy": 0.7838150289017342,
+    "eval_f1": 0.760134834840537,
+    "eval_loss": 1.1006990671157837,
+    "eval_precision": 0.7630015333272686,
+    "eval_recall": 0.7838150289017342,
+    "eval_runtime": 3.2334,
+    "eval_samples_per_second": 267.523,
+    "eval_steps_per_second": 5.876
+}

tokenizer.json CHANGED Viewed

@@ -1,11 +1,6 @@
 {
   "version": "1.0",
-  "truncation": {
-    "direction": "Right",
-    "max_length": 1024,
-    "strategy": "LongestFirst",
-    "stride": 0
-  },
   "padding": null,
   "added_tokens": [
     {

 {
   "version": "1.0",
+  "truncation": null,
   "padding": null,
   "added_tokens": [
     {

tokenizer_config.json CHANGED Viewed

@@ -553,14 +553,10 @@
   "errors": "replace",
   "extra_special_tokens": {},
   "mask_token": "<mask>",
-  "max_length": 1024,
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "<pad>",
   "sep_token": "</s>",
-  "stride": 0,
   "tokenizer_class": "RobertaTokenizer",
   "trim_offsets": true,
-  "truncation_side": "right",
-  "truncation_strategy": "longest_first",
   "unk_token": "<unk>"
 }

   "errors": "replace",
   "extra_special_tokens": {},
   "mask_token": "<mask>",
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "<pad>",
   "sep_token": "</s>",
   "tokenizer_class": "RobertaTokenizer",
   "trim_offsets": true,
   "unk_token": "<unk>"
 }

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 8.0,
+    "total_flos": 2.6699656498043904e+16,
+    "train_loss": 1.6651537001132966,
+    "train_runtime": 443.5681,
+    "train_samples_per_second": 69.257,
+    "train_steps_per_second": 0.721
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4f7548759bf7384197093ccf91ca07874e132e26bdb5f51419e2b0b1c407d859
+size 5905