| from transformers import PretrainedConfig |
| from nltk.corpus import stopwords |
| from typing import List |
| import nltk |
| nltk.download('stopwords') |
| nltk.download('punkt') |
|
|
| class GZIPEmbeddingConfig(PretrainedConfig): |
| model_type = "gzipembed" |
| def __init__( |
| self, |
| normalize = True, |
| normalized_corpus = True, |
| reduction = False, |
| reduced_dimension = 0, |
| remove_stop_words = True, |
| stop_words = stopwords.words('english'), |
| corpus = [], |
| **kwargs, |
| ): |
| self.corpus = corpus |
| self.normalize = normalize |
| self.normalized_corpus = normalized_corpus |
| self.reduction = reduction |
| self.reduced_dimension = reduced_dimension, |
| self.remove_stop_words = remove_stop_words |
| self.stop_words = stop_words |
| super().__init__(**kwargs) |
|
|