Fernando Carneiro commited on
Commit
07fab57
·
1 Parent(s): 966dd59
Files changed (1) hide show
  1. README.md +21 -2
README.md CHANGED
@@ -9,13 +9,14 @@ Having the same architecture of [BERTweet](https://huggingface.co/docs/transform
9
 
10
  ## Usage
11
 
 
 
12
  ```python
13
  import torch
14
  from transformers import AutoModel, AutoTokenizer
15
 
16
  model = AutoModel.from_pretrained('melll-uff/bertweetbr')
17
-
18
- tokenizer = AutoTokenizer.from_pretrained('melll-uff/bertweetbr')
19
 
20
  # INPUT TWEET IS ALREADY NORMALIZED!
21
  line = "Tem vídeo novo no canal do @USER :rosto_sorridente_com_olhos_de_coração: Passem por lá e confiram : HTTPURL"
@@ -24,4 +25,22 @@ input_ids = tokenizer(line, return_tensors="pt")
24
 
25
  with torch.no_grad():
26
  features = model(**input_ids) # Models outputs are now tuples
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  ```
 
9
 
10
  ## Usage
11
 
12
+ ### Normalized Inputs
13
+
14
  ```python
15
  import torch
16
  from transformers import AutoModel, AutoTokenizer
17
 
18
  model = AutoModel.from_pretrained('melll-uff/bertweetbr')
19
+ tokenizer = AutoTokenizer.from_pretrained('melll-uff/bertweetbr', normalization=False)
 
20
 
21
  # INPUT TWEET IS ALREADY NORMALIZED!
22
  line = "Tem vídeo novo no canal do @USER :rosto_sorridente_com_olhos_de_coração: Passem por lá e confiram : HTTPURL"
 
25
 
26
  with torch.no_grad():
27
  features = model(**input_ids) # Models outputs are now tuples
28
+ ```
29
+
30
+ ### Normalize raw input Tweets
31
+
32
+ ```python
33
+ import torch
34
+ from transformers import AutoModel, AutoTokenizer
35
+
36
+
37
+ ```python
38
+ from transformers import pipeline
39
+
40
+ model_name = 'melll-uff/bertweetbr'
41
+ tokenizer = AutoTokenizer.from_pretrained('melll-uff/bertweetbr', normalization=False)
42
+
43
+ filler_mask = pipeline("fill-mask", model=model_name, tokenizer=tokenizer)
44
+
45
+ filler_mask("Rio é a <mask> cidade do Brasil.", top_k=5)
46
  ```