Upload folder using huggingface_hub
Browse files- .ruff_cache/.gitignore +2 -0
- .ruff_cache/0.14.9/14058212920099261697 +0 -0
- .ruff_cache/CACHEDIR.TAG +1 -0
- BiLSTMClassifier.py +36 -0
- BiLSTMClassifier.safetensors +3 -0
- README.md +38 -3
- __pycache__/BiLSTMClassifier.cpython-310.pyc +0 -0
- config.json +12 -0
.ruff_cache/.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Automatically created by ruff.
|
| 2 |
+
*
|
.ruff_cache/0.14.9/14058212920099261697
ADDED
|
Binary file (94 Bytes). View file
|
|
|
.ruff_cache/CACHEDIR.TAG
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Signature: 8a477f597d28d172789f06886806bc55
|
BiLSTMClassifier.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class BiLSTMClassifier(nn.Module):
|
| 6 |
+
def __init__(
|
| 7 |
+
self,
|
| 8 |
+
vocab_size,
|
| 9 |
+
embedding_dim,
|
| 10 |
+
hidden_size,
|
| 11 |
+
num_layers=1,
|
| 12 |
+
dropout=0.2,
|
| 13 |
+
**kwargs,
|
| 14 |
+
):
|
| 15 |
+
super().__init__()
|
| 16 |
+
self.embedding = nn.Embedding(vocab_size, embedding_dim)
|
| 17 |
+
|
| 18 |
+
self.lstm = nn.LSTM(
|
| 19 |
+
input_size=embedding_dim,
|
| 20 |
+
hidden_size=hidden_size,
|
| 21 |
+
num_layers=num_layers,
|
| 22 |
+
batch_first=True,
|
| 23 |
+
bidirectional=True,
|
| 24 |
+
dropout=dropout if num_layers > 1 else 0.0,
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
self.fc = nn.Linear(hidden_size * 2, 1)
|
| 28 |
+
|
| 29 |
+
def forward(self, x):
|
| 30 |
+
x = self.embedding(x)
|
| 31 |
+
outputs, (h_n, c_n) = self.lstm(x)
|
| 32 |
+
h_fwd = h_n[-2, :, :]
|
| 33 |
+
h_bwd = h_n[-1, :, :]
|
| 34 |
+
h_final = torch.cat((h_fwd, h_bwd), dim=1)
|
| 35 |
+
logits = self.fc(h_final)
|
| 36 |
+
return logits
|
BiLSTMClassifier.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c835a0dfca19f68ba9f319433eff472c9adf9716fd19807cdd7a262d7045aff
|
| 3 |
+
size 17618180
|
README.md
CHANGED
|
@@ -1,3 +1,38 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# BiLSTM Text Classifier
|
| 2 |
+
|
| 3 |
+
Simple BiLSTM model PyTorch trained for SPAM detection on SMS Span collection (Almeida, Tiago and Jos Hidalgo. 2011. SMS Spam Collection. UCI Machine Learning Repository. https://doi.org/10.24432/C5CC84.).
|
| 4 |
+
|
| 5 |
+
## Important Notes
|
| 6 |
+
- The model returns the logits as output, so in order to get the probability pass the output to `torch.sigmoid`.
|
| 7 |
+
- The model use `bert-base-uncased` tokenizer
|
| 8 |
+
|
| 9 |
+
## Files
|
| 10 |
+
- `BiLSTMClassifier.safetensors`: trained weights
|
| 11 |
+
- `BiLSTMClassifier.py`: model definition
|
| 12 |
+
- `config.json`: hyperparameters
|
| 13 |
+
|
| 14 |
+
## Usage
|
| 15 |
+
|
| 16 |
+
```python
|
| 17 |
+
import json
|
| 18 |
+
import torch
|
| 19 |
+
from transformers import BertTokenizer
|
| 20 |
+
from safetensors.torch import load_file
|
| 21 |
+
from BiLSTMClassifier import BiLSTMClassifier
|
| 22 |
+
|
| 23 |
+
with open("config.json") as f:
|
| 24 |
+
cfg = json.load(f)
|
| 25 |
+
|
| 26 |
+
model = BiLSTMClassifier(**cfg)
|
| 27 |
+
|
| 28 |
+
state_dict = load_file("BiLSTMClassifier.safetensors")
|
| 29 |
+
model.load_state_dict(state_dict)
|
| 30 |
+
model.eval()
|
| 31 |
+
|
| 32 |
+
sample_text = "URGENT HIRING! Earn $500/day working from home. No experience needed. Apply here: www.somenthing.io/hiring"
|
| 33 |
+
|
| 34 |
+
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
|
| 35 |
+
tokens = tokenizer(sample_text, return_tensors="pt")
|
| 36 |
+
logits = model(tokens["input_ids"])
|
| 37 |
+
p = torch.sigmoid(logits)
|
| 38 |
+
```
|
__pycache__/BiLSTMClassifier.cpython-310.pyc
ADDED
|
Binary file (1.21 kB). View file
|
|
|
config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_type": "bilstm",
|
| 3 |
+
"framework": "pytorch",
|
| 4 |
+
"task": "text-classification",
|
| 5 |
+
"vocab_size": 30522,
|
| 6 |
+
"embedding_dim": 128,
|
| 7 |
+
"hidden_size": 64,
|
| 8 |
+
"num_layers": 5,
|
| 9 |
+
"bidirectional": true,
|
| 10 |
+
"dropout": 0.2,
|
| 11 |
+
"num_classes": 2
|
| 12 |
+
}
|