MarkProMaster229 commited on
Commit
dfc77b3
·
verified ·
1 Parent(s): 47bf347

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +63 -4
README.md CHANGED
@@ -2,8 +2,19 @@
2
  license: apache-2.0
3
  ---
4
  ```python
 
 
 
 
 
 
 
 
 
 
 
5
  class TransformerBlock(nn.Module):
6
- def __init__(self, sizeVector=256, numHeads=8, dropout=0.1):
7
  super().__init__()
8
  self.ln1 = nn.LayerNorm(sizeVector)
9
  self.attn = nn.MultiheadAttention(sizeVector, numHeads, batch_first=True)
@@ -24,9 +35,8 @@ class TransformerBlock(nn.Module):
24
  x = x + self.ff(self.ln2(x))
25
  return x
26
 
27
-
28
  class TransformerRun(nn.Module):
29
- def __init__(self, vocabSize=120000, maxLen=100, sizeVector=256, numBlocks=4, numHeads=8, numClasses=3, dropout=0.1):
30
  super().__init__()
31
  self.token_emb = nn.Embedding(vocabSize, sizeVector)
32
  self.pos_emb = nn.Embedding(maxLen, sizeVector)
@@ -52,4 +62,53 @@ class TransformerRun(nn.Module):
52
  combined = torch.cat([cls_token, mean_pool], dim=1)
53
  combined = self.ln(self.dropout(combined))
54
  logits = self.classifier(combined)
55
- return logits
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  license: apache-2.0
3
  ---
4
  ```python
5
+ import torch
6
+ import torch.nn as nn
7
+ from transformers import PreTrainedTokenizerFast
8
+ from huggingface_hub import hf_hub_download
9
+
10
+ repo_id = "MarkProMaster229/ClassificationSmall"
11
+
12
+ weights_path = hf_hub_download(repo_id=repo_id, filename="model_weights.pth")
13
+ tokenizer_path = hf_hub_download(repo_id=repo_id, filename="tokenizer.json")
14
+ vocab_path = hf_hub_download(repo_id=repo_id, filename="vocab.txt")
15
+
16
  class TransformerBlock(nn.Module):
17
+ def __init__(self, sizeVector=256, numHeads=8, dropout=0.5):
18
  super().__init__()
19
  self.ln1 = nn.LayerNorm(sizeVector)
20
  self.attn = nn.MultiheadAttention(sizeVector, numHeads, batch_first=True)
 
35
  x = x + self.ff(self.ln2(x))
36
  return x
37
 
 
38
  class TransformerRun(nn.Module):
39
+ def __init__(self, vocabSize=120000, maxLen=100, sizeVector=256, numBlocks=4, numHeads=8, numClasses=3, dropout=0.5):
40
  super().__init__()
41
  self.token_emb = nn.Embedding(vocabSize, sizeVector)
42
  self.pos_emb = nn.Embedding(maxLen, sizeVector)
 
62
  combined = torch.cat([cls_token, mean_pool], dim=1)
63
  combined = self.ln(self.dropout(combined))
64
  logits = self.classifier(combined)
65
+ return logits
66
+
67
+ config_dict = {
68
+ 'vocabSize': 119547,
69
+ 'maxLong': 100,
70
+ 'sizeVector': 256,
71
+ 'numLayers': 4,
72
+ 'numHeads': 8,
73
+ 'numClasses': 3
74
+ }
75
+
76
+ model = TransformerRun(
77
+ vocabSize=config_dict['vocabSize'],
78
+ maxLen=config_dict['maxLong'],
79
+ sizeVector=config_dict['sizeVector'],
80
+ numBlocks=config_dict['numLayers'],
81
+ numHeads=config_dict['numHeads'],
82
+ numClasses=config_dict['numClasses'],
83
+ dropout=0.1
84
+ )
85
+
86
+ state_dict = torch.load(weights_path, map_location="cpu")
87
+ model.load_state_dict(state_dict)
88
+ model.eval()
89
+
90
+ tokenizer = PreTrainedTokenizerFast(
91
+ tokenizer_file=tokenizer_path,
92
+ vocab_file=vocab_path
93
+ )
94
+
95
+ label_map = {
96
+ 0: "positive",
97
+ 1: "negative",
98
+ 2: "neutral"
99
+ }
100
+
101
+ texts = [
102
+ "Я люблю тебя",
103
+ "Мне совсем не понравился этот фильм",
104
+ "Кличка моей кошки - Ирис"
105
+ ]
106
+
107
+ for text in texts:
108
+ inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=config_dict['maxLong'])
109
+ with torch.no_grad():
110
+ logits = model(inputs['input_ids'])
111
+ pred_idx = torch.argmax(logits, dim=1).item()
112
+ pred_label = label_map[pred_idx]
113
+ print(f"Текст: {text}")
114
+ print(f"Предсказанный класс: {pred_label} ({pred_idx})")