MarkProMaster229
/

ClassificationSmall

@@ -2,8 +2,19 @@
 license: apache-2.0
 ---
 ```python
 class TransformerBlock(nn.Module):
-    def __init__(self, sizeVector=256, numHeads=8, dropout=0.1):
         super().__init__()
         self.ln1 = nn.LayerNorm(sizeVector)
         self.attn = nn.MultiheadAttention(sizeVector, numHeads, batch_first=True)
@@ -24,9 +35,8 @@ class TransformerBlock(nn.Module):
         x = x + self.ff(self.ln2(x))
         return x
 class TransformerRun(nn.Module):
-    def __init__(self, vocabSize=120000, maxLen=100, sizeVector=256, numBlocks=4, numHeads=8, numClasses=3, dropout=0.1):
         super().__init__()
         self.token_emb = nn.Embedding(vocabSize, sizeVector)
         self.pos_emb = nn.Embedding(maxLen, sizeVector)
@@ -52,4 +62,53 @@ class TransformerRun(nn.Module):
         combined = torch.cat([cls_token, mean_pool], dim=1)
         combined = self.ln(self.dropout(combined))
         logits = self.classifier(combined)
-        return logits

 license: apache-2.0
 ---
 ```python
+import torch
+import torch.nn as nn
+from transformers import PreTrainedTokenizerFast
+from huggingface_hub import hf_hub_download
+repo_id = "MarkProMaster229/ClassificationSmall"
+weights_path = hf_hub_download(repo_id=repo_id, filename="model_weights.pth")
+tokenizer_path = hf_hub_download(repo_id=repo_id, filename="tokenizer.json")
+vocab_path = hf_hub_download(repo_id=repo_id, filename="vocab.txt")
 class TransformerBlock(nn.Module):
+    def __init__(self, sizeVector=256, numHeads=8, dropout=0.5):
         super().__init__()
         self.ln1 = nn.LayerNorm(sizeVector)
         self.attn = nn.MultiheadAttention(sizeVector, numHeads, batch_first=True)
         x = x + self.ff(self.ln2(x))
         return x
 class TransformerRun(nn.Module):
+    def __init__(self, vocabSize=120000, maxLen=100, sizeVector=256, numBlocks=4, numHeads=8, numClasses=3, dropout=0.5):
         super().__init__()
         self.token_emb = nn.Embedding(vocabSize, sizeVector)
         self.pos_emb = nn.Embedding(maxLen, sizeVector)
         combined = torch.cat([cls_token, mean_pool], dim=1)
         combined = self.ln(self.dropout(combined))
         logits = self.classifier(combined)
+        return logits
+config_dict = {
+    'vocabSize': 119547,
+    'maxLong': 100,
+    'sizeVector': 256,
+    'numLayers': 4,
+    'numHeads': 8,
+    'numClasses': 3
+}
+model = TransformerRun(
+    vocabSize=config_dict['vocabSize'],
+    maxLen=config_dict['maxLong'],
+    sizeVector=config_dict['sizeVector'],
+    numBlocks=config_dict['numLayers'],
+    numHeads=config_dict['numHeads'],
+    numClasses=config_dict['numClasses'],
+    dropout=0.1
+)
+state_dict = torch.load(weights_path, map_location="cpu")
+model.load_state_dict(state_dict)
+model.eval()
+tokenizer = PreTrainedTokenizerFast(
+    tokenizer_file=tokenizer_path,
+    vocab_file=vocab_path
+)
+label_map = {
+    0: "positive",
+    1: "negative",
+    2: "neutral"
+}
+texts = [
+    "Я люблю тебя",
+    "Мне совсем не понравился этот фильм",
+    "Кличка моей кошки - Ирис"
+]
+for text in texts:
+    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=config_dict['maxLong'])
+    with torch.no_grad():
+        logits = model(inputs['input_ids'])
+        pred_idx = torch.argmax(logits, dim=1).item()
+        pred_label = label_map[pred_idx]
+    print(f"Текст: {text}")
+    print(f"Предсказанный класс: {pred_label} ({pred_idx})")