Spaces:
Build error
Build error
| import torch | |
| from torch import nn | |
| from torch.nn import init | |
| from transformers import AutoTokenizer, AutoModel | |
| class ProdFeatureEncoder(nn.Module): | |
| """ | |
| Model for creating embeddings with pre-trained ruBERT-tiny BERT. | |
| Attributes: | |
| config (object): Configuration object containing model hyperparameters. | |
| tokenizer (AutoTokenizer): Tokenizer instance for ruBERT-tiny. | |
| model (AutoModel): Pre-trained ruBERT-tiny model instance. | |
| fc (nn.Linear): Linear layer for dimensionality reduction. | |
| """ | |
| def __init__(self, config): | |
| """ | |
| Initializes the ProdFeatureEncoder model. | |
| Args: | |
| config (object): Configuration object containing model hyperparameters. | |
| """ | |
| super().__init__() | |
| self.config = config | |
| self.tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny") | |
| self.model = AutoModel.from_pretrained("cointegrated/rubert-tiny") | |
| self.fc = nn.Linear(self.config.bert_output_size, self.config.embedding_size) | |
| init.xavier_uniform_(self.fc.weight) | |
| self.norm = nn.LayerNorm(self.config.embedding_size) | |
| def forward(self, text: str): | |
| """ | |
| Creates an embedding for the input text. | |
| Args: | |
| text (str): Input text to create an embedding for. | |
| Returns: | |
| torch.Tensor: Embedding vector for the input text. | |
| """ | |
| tokens = self.tokenizer(text, padding=True, truncation=True, return_tensors='pt') | |
| model_output = self.model(**{k: v.to(self.model.device) for k, v in tokens.items()}) | |
| embedding = model_output.last_hidden_state[:, 0, :] | |
| embedding = self.fc(embedding) | |
| return embedding[0] | |