poltextlab commited on
Commit
7352fe8
·
verified ·
1 Parent(s): 3a75bd3
.ipynb_checkpoints/config-checkpoint.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "XLMRobertaForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 1024,
12
+ "id2label": {
13
+ "0": "710",
14
+ "1": "720",
15
+ "2": "721",
16
+ "3": "722",
17
+ "4": "723",
18
+ "5": "724",
19
+ "6": "730",
20
+ "7": "740",
21
+ "8": "799"
22
+ },
23
+ "initializer_range": 0.02,
24
+ "intermediate_size": 4096,
25
+ "label2id": {
26
+ "710": 0,
27
+ "720": 1,
28
+ "721": 2,
29
+ "722": 3,
30
+ "723": 4,
31
+ "724": 5,
32
+ "730": 6,
33
+ "740": 7,
34
+ "799": 8
35
+ },
36
+ "layer_norm_eps": 1e-05,
37
+ "max_position_embeddings": 514,
38
+ "model_type": "xlm-roberta",
39
+ "num_attention_heads": 16,
40
+ "num_hidden_layers": 24,
41
+ "output_past": true,
42
+ "pad_token_id": 1,
43
+ "position_embedding_type": "absolute",
44
+ "problem_type": "single_label_classification",
45
+ "torch_dtype": "float32",
46
+ "transformers_version": "4.51.3",
47
+ "type_vocab_size": 1,
48
+ "use_cache": true,
49
+ "vocab_size": 250002
50
+ }
README.md ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ model-index:
3
+ - name: poltextlab/illframes-climate-v5
4
+ results:
5
+ - task:
6
+ type: text-classification
7
+ metrics:
8
+ - name: Accuracy
9
+ type: accuracy
10
+ value: 72%
11
+ - name: F1-Score
12
+ type: f1
13
+ value: 64%
14
+ tags:
15
+ - text-classification
16
+ - pytorch
17
+ metrics:
18
+ - precision
19
+ - recall
20
+ - f1-score
21
+ language:
22
+ - en
23
+ base_model:
24
+ - xlm-roberta-large
25
+ pipeline_tag: text-classification
26
+ library_name: transformers
27
+ license: cc-by-4.0
28
+ extra_gated_prompt: Our models are intended for academic use only. If you are not
29
+ affiliated with an academic institution, please provide a rationale for using our
30
+ models. Please allow us a few business days to manually review subscriptions.
31
+ extra_gated_fields:
32
+ Name: text
33
+ Country: country
34
+ Institution: text
35
+ Institution Email: text
36
+ Please specify your academic use case: text
37
+ ---
38
+
39
+ # illframes-climate-v5
40
+
41
+
42
+ # How to use the model
43
+
44
+ ```python
45
+ from transformers import AutoTokenizer, pipeline
46
+
47
+ tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-large")
48
+ pipe = pipeline(
49
+ model="poltextlab/illframes-climate-v5",
50
+ task="text-classification",
51
+ tokenizer=tokenizer,
52
+ use_fast=False,
53
+ token="<your_hf_read_only_token>"
54
+ )
55
+
56
+ text = "<text_to_classify>"
57
+ pipe(text)
58
+ ```
59
+
60
+
61
+ # Classification Report
62
+
63
+ ## Overall Performance:
64
+
65
+ * **Accuracy:** 72%
66
+ * **Macro Avg:** Precision: 0.45, Recall: 0.29, F1-score: 0.31
67
+ * **Weighted Avg:** Precision: 0.65, Recall: 0.72, F1-score: 0.64
68
+
69
+ ## Per-Class Metrics:
70
+
71
+ | Label | Precision | Recall | F1-score | Support |
72
+ |:-----------------------------------------|------------:|---------:|-----------:|----------:|
73
+ | 710: Threatening economic growth | 0.63 | 0.3 | 0.41 | 63 |
74
+ | 720: Threatening national sovereignty | 1 | 0.15 | 0.26 | 20 |
75
+ | 721: Climate conspiracy | 0 | 0 | 0 | 15 |
76
+ | 722: Scientific scepticism and denial | 0 | 0 | 0 | 19 |
77
+ | 723: Climate movement bashing | 0.33 | 0.28 | 0.3 | 18 |
78
+ | 724: Other polluters as the real problem | 0.77 | 0.8 | 0.78 | 25 |
79
+ | 730: Threatening energy security | 0.6 | 0.09 | 0.16 | 33 |
80
+ | 740: Threatening way of life | 0 | 0 | 0 | 11 |
81
+ | 799: None of them | 0.73 | 0.99 | 0.84 | 356 |
82
+
83
+ # Inference platform
84
+ This model is used by the [CAP Babel Machine](https://babel.poltextlab.com), an open-source and free natural language processing tool, designed to simplify and speed up projects for comparative research.
85
+
86
+ # Cooperation
87
+ Model performance can be significantly improved by extending our training sets. We appreciate every submission of CAP-coded corpora (of any domain and language) at poltextlab{at}poltextlab{dot}com or by using the [CAP Babel Machine](https://babel.poltextlab.com).
88
+ ## Debugging and issues
89
+ This architecture uses the `sentencepiece` tokenizer. In order to run the model before `transformers==4.27` you need to install it manually.
config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "XLMRobertaForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 1024,
12
+ "id2label": {
13
+ "0": "710",
14
+ "1": "720",
15
+ "2": "721",
16
+ "3": "722",
17
+ "4": "723",
18
+ "5": "724",
19
+ "6": "730",
20
+ "7": "740",
21
+ "8": "799"
22
+ },
23
+ "initializer_range": 0.02,
24
+ "intermediate_size": 4096,
25
+ "label2id": {
26
+ "710": 0,
27
+ "720": 1,
28
+ "721": 2,
29
+ "722": 3,
30
+ "723": 4,
31
+ "724": 5,
32
+ "730": 6,
33
+ "740": 7,
34
+ "799": 8
35
+ },
36
+ "layer_norm_eps": 1e-05,
37
+ "max_position_embeddings": 514,
38
+ "model_type": "xlm-roberta",
39
+ "num_attention_heads": 16,
40
+ "num_hidden_layers": 24,
41
+ "output_past": true,
42
+ "pad_token_id": 1,
43
+ "position_embedding_type": "absolute",
44
+ "problem_type": "single_label_classification",
45
+ "torch_dtype": "float32",
46
+ "transformers_version": "4.51.3",
47
+ "type_vocab_size": 1,
48
+ "use_cache": true,
49
+ "vocab_size": 250002
50
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cbf987e75ce17446e7cc4488b0d7f0da5a2e8ff9833bdd7d0e584dcaa4723b8
3
+ size 2239647372
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dda67c55d5246d9e910cec27564d3b8b11ded99650d625e2b7f1f5af3c90defe
3
+ size 5777