poltextlab commited on
Commit
b6bbac7
·
verified ·
1 Parent(s): c4f2326

initial commit

Browse files
Files changed (4) hide show
  1. README.md +83 -0
  2. config.json +38 -0
  3. finetune_config.json +9 -0
  4. model.safetensors +3 -0
README.md ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ model-index:
3
+ - name: poltextlab/finetune-agent-prod
4
+ results:
5
+ - task:
6
+ type: text-classification
7
+ metrics:
8
+ - name: Accuracy
9
+ type: accuracy
10
+ value: N/A
11
+ - name: F1-Score
12
+ type: f1
13
+ value: 86%
14
+ tags:
15
+ - text-classification
16
+ - pytorch
17
+ metrics:
18
+ - precision
19
+ - recall
20
+ - f1-score
21
+ language:
22
+ - en
23
+ base_model:
24
+ - xlm-roberta-large
25
+ pipeline_tag: text-classification
26
+ library_name: transformers
27
+ license: cc-by-4.0
28
+ extra_gated_prompt: Our models are intended for academic use only. If you are not
29
+ affiliated with an academic institution, please provide a rationale for using our
30
+ models. Please allow us a few business days to manually review subscriptions.
31
+ extra_gated_fields:
32
+ Name: text
33
+ Country: country
34
+ Institution: text
35
+ Institution Email: text
36
+ Please specify your academic use case: text
37
+ ---
38
+
39
+ # finetune-agent-prod
40
+
41
+
42
+ # How to use the model
43
+
44
+ ```python
45
+ from transformers import AutoTokenizer, pipeline
46
+
47
+ tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-large")
48
+ pipe = pipeline(
49
+ model="poltextlab/finetune-agent-prod",
50
+ task="text-classification",
51
+ tokenizer=tokenizer,
52
+ use_fast=False,
53
+ token="<your_hf_read_only_token>"
54
+ )
55
+
56
+ text = "<text_to_classify>"
57
+ pipe(text)
58
+ ```
59
+
60
+
61
+ # Classification Report
62
+
63
+ ## Overall Performance:
64
+
65
+ * **Accuracy:** N/A
66
+ * **Macro Avg:** Precision: 0.86, Recall: 0.86, F1-score: 0.86
67
+ * **Weighted Avg:** Precision: 0.86, Recall: 0.86, F1-score: 0.86
68
+
69
+ ## Per-Class Metrics:
70
+
71
+ | Label | Precision | Recall | F1-score | Support |
72
+ |:----------------------------------------|------------:|---------:|-----------:|----------:|
73
+ | (0_0) Procedural | 1 | 0.94 | 0.97 | 35 |
74
+ | (0_1) Commemorative / one-minute speech | 0.78 | 0.88 | 0.83 | 33 |
75
+ | (1_1) Relevant | 0.8 | 0.75 | 0.77 | 32 |
76
+
77
+ # Inference platform
78
+ This model is used by the [CAP Babel Machine](https://babel.poltextlab.com), an open-source and free natural language processing tool, designed to simplify and speed up projects for comparative research.
79
+
80
+ # Cooperation
81
+ Model performance can be significantly improved by extending our training sets. We appreciate every submission of CAP-coded corpora (of any domain and language) at poltextlab{at}poltextlab{dot}com or by using the [CAP Babel Machine](https://babel.poltextlab.com).
82
+ ## Debugging and issues
83
+ This architecture uses the `sentencepiece` tokenizer. In order to run the model before `transformers==4.27` you need to install it manually.
config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "XLMRobertaForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 1024,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 4096,
19
+ "label2id": {
20
+ "LABEL_0": 0,
21
+ "LABEL_1": 1,
22
+ "LABEL_2": 2
23
+ },
24
+ "layer_norm_eps": 1e-05,
25
+ "max_position_embeddings": 514,
26
+ "model_type": "xlm-roberta",
27
+ "num_attention_heads": 16,
28
+ "num_hidden_layers": 24,
29
+ "output_past": true,
30
+ "pad_token_id": 1,
31
+ "position_embedding_type": "absolute",
32
+ "problem_type": "multi_label_classification",
33
+ "torch_dtype": "float32",
34
+ "transformers_version": "4.51.3",
35
+ "type_vocab_size": 1,
36
+ "use_cache": true,
37
+ "vocab_size": 250002
38
+ }
finetune_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_path": "xlm-roberta-large",
3
+ "model_type": "encoder",
4
+ "learning_rate": 2e-05,
5
+ "epochs": 3,
6
+ "batch_size": 16,
7
+ "max_seq_length": 128,
8
+ "domain": "migration"
9
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc6c05ef4d10033af329668dbe9cf175502dd0bdd78d6326c21dc7ddded098d8
3
+ size 2239622772