poltextlab commited on
Commit
cf6a45a
·
verified ·
1 Parent(s): e46f19f

Automated hub push by babel_finetune_agent

Browse files
Files changed (5) hide show
  1. README.md +82 -0
  2. config.json +36 -0
  3. finetune_config.json +9 -0
  4. model.safetensors +3 -0
  5. training_args.bin +3 -0
README.md ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ model-index:
3
+ - name: poltextlab/xlm-roberta-large-ineq-binary-v6
4
+ results:
5
+ - task:
6
+ type: text-classification
7
+ metrics:
8
+ - name: Accuracy
9
+ type: accuracy
10
+ value: N/A
11
+ - name: F1-Score
12
+ type: f1
13
+ value: 46%
14
+ tags:
15
+ - text-classification
16
+ - pytorch
17
+ metrics:
18
+ - precision
19
+ - recall
20
+ - f1-score
21
+ language:
22
+ - en
23
+ base_model:
24
+ - xlm-roberta-large
25
+ pipeline_tag: text-classification
26
+ library_name: transformers
27
+ license: cc-by-4.0
28
+ extra_gated_prompt: Our models are intended for academic use only. If you are not
29
+ affiliated with an academic institution, please provide a rationale for using our
30
+ models. Please allow us a few business days to manually review subscriptions.
31
+ extra_gated_fields:
32
+ Name: text
33
+ Country: country
34
+ Institution: text
35
+ Institution Email: text
36
+ Please specify your academic use case: text
37
+ ---
38
+
39
+ # xlm-roberta-large-ineq-binary-v6
40
+
41
+
42
+ # How to use the model
43
+
44
+ ```python
45
+ from transformers import AutoTokenizer, pipeline
46
+
47
+ tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-large")
48
+ pipe = pipeline(
49
+ model="poltextlab/xlm-roberta-large-ineq-binary-v6",
50
+ task="text-classification",
51
+ tokenizer=tokenizer,
52
+ use_fast=False,
53
+ token="<your_hf_read_only_token>"
54
+ )
55
+
56
+ text = "<text_to_classify>"
57
+ pipe(text)
58
+ ```
59
+
60
+
61
+ # Classification Report
62
+
63
+ ## Overall Performance:
64
+
65
+ * **Accuracy:** N/A
66
+ * **Macro Avg:** Precision: 0.70, Recall: 0.56, F1-score: 0.46
67
+ * **Weighted Avg:** Precision: 0.70, Recall: 0.56, F1-score: 0.46
68
+
69
+ ## Per-Class Metrics:
70
+
71
+ | Label | Precision | Recall | F1-score | Support |
72
+ |:---------------------------|------------:|---------:|-----------:|----------:|
73
+ | (0) Not inequality related | 0.53 | 0.98 | 0.69 | 51 |
74
+ | (1) Inequality related | 0.88 | 0.14 | 0.24 | 51 |
75
+
76
+ # Inference platform
77
+ This model is used by the [CAP Babel Machine](https://babel.poltextlab.com), an open-source and free natural language processing tool, designed to simplify and speed up projects for comparative research.
78
+
79
+ # Cooperation
80
+ Model performance can be significantly improved by extending our training sets. We appreciate every submission of CAP-coded corpora (of any domain and language) at poltextlab{at}poltextlab{dot}com or by using the [CAP Babel Machine](https://babel.poltextlab.com).
81
+ ## Debugging and issues
82
+ This architecture uses the `sentencepiece` tokenizer. In order to run the model before `transformers==4.27` you need to install it manually.
config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "XLMRobertaForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 1024,
12
+ "id2label": {
13
+ "0": "Not inequality related",
14
+ "1": "Inequality related"
15
+ },
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 4096,
18
+ "label2id": {
19
+ "Inequality related": 1,
20
+ "Not inequality related": 0
21
+ },
22
+ "layer_norm_eps": 1e-05,
23
+ "max_position_embeddings": 514,
24
+ "model_type": "xlm-roberta",
25
+ "num_attention_heads": 16,
26
+ "num_hidden_layers": 24,
27
+ "output_past": true,
28
+ "pad_token_id": 1,
29
+ "position_embedding_type": "absolute",
30
+ "problem_type": "multi_label_classification",
31
+ "torch_dtype": "float32",
32
+ "transformers_version": "4.51.3",
33
+ "type_vocab_size": 1,
34
+ "use_cache": true,
35
+ "vocab_size": 250002
36
+ }
finetune_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_path": "xlm-roberta-large",
3
+ "model_type": "encoder",
4
+ "learning_rate": 5e-07,
5
+ "epochs": 15,
6
+ "batch_size": 16,
7
+ "max_seq_length": 256,
8
+ "domain": "migration"
9
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb7a7a7bf256f0249a7afab2b25b9fbb730cf17d10824d1a812e8238d8afc328
3
+ size 2239618672
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:227d1389719a6b2cf66e78bc885315e480f46d5ad8ba37892e69c2ff10c77aa6
3
+ size 5713