Update app.py
Browse files
app.py
CHANGED
|
@@ -18,19 +18,19 @@ model = BertForSequenceClassification.from_pretrained("NimaKL/spamd_model")
|
|
| 18 |
token_id = []
|
| 19 |
attention_masks = []
|
| 20 |
def preprocessing(input_text, tokenizer):
|
| 21 |
-
|
| 22 |
Returns <class transformers.tokenization_utils_base.BatchEncoding> with the following fields:
|
| 23 |
- input_ids: list of token ids
|
| 24 |
- token_type_ids: list of token type ids
|
| 25 |
- attention_mask: list of indices (0,1) specifying which tokens should considered by the model (return_attention_mask = True).
|
| 26 |
-
|
| 27 |
return tokenizer.encode_plus(
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
)
|
| 35 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 36 |
with col1:
|
|
|
|
| 18 |
token_id = []
|
| 19 |
attention_masks = []
|
| 20 |
def preprocessing(input_text, tokenizer):
|
| 21 |
+
'''
|
| 22 |
Returns <class transformers.tokenization_utils_base.BatchEncoding> with the following fields:
|
| 23 |
- input_ids: list of token ids
|
| 24 |
- token_type_ids: list of token type ids
|
| 25 |
- attention_mask: list of indices (0,1) specifying which tokens should considered by the model (return_attention_mask = True).
|
| 26 |
+
'''
|
| 27 |
return tokenizer.encode_plus(
|
| 28 |
+
input_text,
|
| 29 |
+
add_special_tokens = True,
|
| 30 |
+
max_length = 32,
|
| 31 |
+
pad_to_max_length = True,
|
| 32 |
+
return_attention_mask = True,
|
| 33 |
+
return_tensors = 'pt'
|
| 34 |
)
|
| 35 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 36 |
with col1:
|