St3w31 commited on
Commit
1481eeb
·
verified ·
1 Parent(s): ff1b84d

Upload folder using huggingface_hub

Browse files
.ruff_cache/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # Automatically created by ruff.
2
+ *
.ruff_cache/0.14.9/14058212920099261697 ADDED
Binary file (94 Bytes). View file
 
.ruff_cache/CACHEDIR.TAG ADDED
@@ -0,0 +1 @@
 
 
1
+ Signature: 8a477f597d28d172789f06886806bc55
BiLSTMClassifier.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+
5
+ class BiLSTMClassifier(nn.Module):
6
+ def __init__(
7
+ self,
8
+ vocab_size,
9
+ embedding_dim,
10
+ hidden_size,
11
+ num_layers=1,
12
+ dropout=0.2,
13
+ **kwargs,
14
+ ):
15
+ super().__init__()
16
+ self.embedding = nn.Embedding(vocab_size, embedding_dim)
17
+
18
+ self.lstm = nn.LSTM(
19
+ input_size=embedding_dim,
20
+ hidden_size=hidden_size,
21
+ num_layers=num_layers,
22
+ batch_first=True,
23
+ bidirectional=True,
24
+ dropout=dropout if num_layers > 1 else 0.0,
25
+ )
26
+
27
+ self.fc = nn.Linear(hidden_size * 2, 1)
28
+
29
+ def forward(self, x):
30
+ x = self.embedding(x)
31
+ outputs, (h_n, c_n) = self.lstm(x)
32
+ h_fwd = h_n[-2, :, :]
33
+ h_bwd = h_n[-1, :, :]
34
+ h_final = torch.cat((h_fwd, h_bwd), dim=1)
35
+ logits = self.fc(h_final)
36
+ return logits
BiLSTMClassifier.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c835a0dfca19f68ba9f319433eff472c9adf9716fd19807cdd7a262d7045aff
3
+ size 17618180
README.md CHANGED
@@ -1,3 +1,38 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # BiLSTM Text Classifier
2
+
3
+ Simple BiLSTM model PyTorch trained for SPAM detection on SMS Span collection (Almeida, Tiago and Jos Hidalgo. 2011. SMS Spam Collection. UCI Machine Learning Repository. https://doi.org/10.24432/C5CC84.).
4
+
5
+ ## Important Notes
6
+ - The model returns the logits as output, so in order to get the probability pass the output to `torch.sigmoid`.
7
+ - The model use `bert-base-uncased` tokenizer
8
+
9
+ ## Files
10
+ - `BiLSTMClassifier.safetensors`: trained weights
11
+ - `BiLSTMClassifier.py`: model definition
12
+ - `config.json`: hyperparameters
13
+
14
+ ## Usage
15
+
16
+ ```python
17
+ import json
18
+ import torch
19
+ from transformers import BertTokenizer
20
+ from safetensors.torch import load_file
21
+ from BiLSTMClassifier import BiLSTMClassifier
22
+
23
+ with open("config.json") as f:
24
+ cfg = json.load(f)
25
+
26
+ model = BiLSTMClassifier(**cfg)
27
+
28
+ state_dict = load_file("BiLSTMClassifier.safetensors")
29
+ model.load_state_dict(state_dict)
30
+ model.eval()
31
+
32
+ sample_text = "URGENT HIRING! Earn $500/day working from home. No experience needed. Apply here: www.somenthing.io/hiring"
33
+
34
+ tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
35
+ tokens = tokenizer(sample_text, return_tensors="pt")
36
+ logits = model(tokens["input_ids"])
37
+ p = torch.sigmoid(logits)
38
+ ```
__pycache__/BiLSTMClassifier.cpython-310.pyc ADDED
Binary file (1.21 kB). View file
 
config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "bilstm",
3
+ "framework": "pytorch",
4
+ "task": "text-classification",
5
+ "vocab_size": 30522,
6
+ "embedding_dim": 128,
7
+ "hidden_size": 64,
8
+ "num_layers": 5,
9
+ "bidirectional": true,
10
+ "dropout": 0.2,
11
+ "num_classes": 2
12
+ }